Source code for ccc.data

'''
Cost-of-Capital-Calculator asset data class.
'''
# CODING-STYLE CHECKS:
# pycodestyle records.py
# pylint --disable=locally-disabled records.py

import os
import pandas as pd
from ccc.utils import read_egg_csv, read_egg_json, json_to_dict
from ccc.utils import ASSET_DATA_CSV_YEAR


[docs]class Assets(): ''' Constructor for the asset-entity type Records class. Args: data (string or Pandas DataFrame): string describes CSV file in which records data reside; DataFrame already contains records data; default value is the string 'asset_data.csv' start_year (integer): specifies calendar year of the input data; default value is ASSET_DATA_CSV_YEAR. Returns: Assets (class instance) Raises: ValueError: * if data is not the appropriate type. * if start_year is not an integer. * if files cannot be found. Notes: Typical usage when using ccc_asset_data.csv input data is as follows:: >>> assets = Assets() which uses all the default parameters of the constructor. ''' # suppress pylint warnings about unrecognized Records variables: # pylint: disable=no-member # suppress pylint warnings about uppercase variable names: # pylint: disable=invalid-name # suppress pylint warnings about too many class instance attributes: # pylint: disable=too-many-instance-attributes ASSET_YEAR = ASSET_DATA_CSV_YEAR CUR_PATH = os.path.abspath(os.path.dirname(__file__)) VAR_INFO_FILENAME = 'records_variables.json' def __init__(self, data=os.path.join(CUR_PATH, 'ccc_asset_data.csv'), start_year=ASSET_DATA_CSV_YEAR): # pylint: disable=too-many-arguments,too-many-locals self.__data_year = start_year # read specified data self._read_data(data) # If have any checks on data, do there here... # specify that variable values do not include behavioral responses self.behavioral_responses_are_included = False @property def data_year(self): ''' Records class original data year property. ''' return self.__data_year @property def array_length(self): ''' Length of arrays in Records class's DataFrame. ''' return self.__dim
[docs] @staticmethod def read_var_info(): ''' Read Assets variables metadata from JSON file; returns dictionary and specifies static varname sets listed below. ''' var_info_path = os.path.join(Assets.CUR_PATH, Assets.VAR_INFO_FILENAME) if os.path.exists(var_info_path): with open(var_info_path) as vfile: json_text = vfile.read() vardict = json_to_dict(json_text) else: # cannot call read_egg_ function in unit tests vardict = read_egg_json( Assets.VAR_INFO_FILENAME) # pragma: no cover Assets.INTEGER_READ_VARS = set(k for k, v in vardict['read'].items() if v['type'] == 'int') FLOAT_READ_VARS = set(k for k, v in vardict['read'].items() if v['type'] == 'float') Assets.MUST_READ_VARS = set(k for k, v in vardict['read'].items() if v.get('required')) Assets.USABLE_READ_VARS = Assets.INTEGER_READ_VARS | FLOAT_READ_VARS Assets.INTEGER_VARS = Assets.INTEGER_READ_VARS return vardict
# specify various sets of variable names INTEGER_READ_VARS = set() MUST_READ_VARS = set() USABLE_READ_VARS = set() INTEGER_VARS = set()
[docs] def _read_data(self, data): ''' Read Records data from file or use specified DataFrame as data. Args: data (string or Pandas DataFrame): data or path to data Returns: None ''' # pylint: disable=too-many-statements,too-many-branches if Assets.INTEGER_VARS == set(): Assets.read_var_info() # read specified data if isinstance(data, pd.DataFrame): assetdf = data elif isinstance(data, str): if os.path.isfile(data): assetdf = pd.read_csv(data) # pragma: no cover else: # cannot call read_egg_ function in unit tests assetdf = read_egg_csv(data) # pragma: no cover else: msg = 'data is neither a string nor a Pandas DataFrame' raise ValueError(msg) self.__dim = len(assetdf.index) self.__index = assetdf.index self.df = assetdf