Source code for ccc.data

"""
Cost-of-Capital-Calculator asset data class.
"""

# CODING-STYLE CHECKS:
# pycodestyle records.py
# pylint --disable=locally-disabled records.py

import os
import pandas as pd
from ccc.utils import read_egg_csv, read_egg_json, json_to_dict
from ccc.utils import ASSET_DATA_CSV_YEAR


[docs] class Assets: """ Constructor for the asset-entity type Records class. Args: data (string or Pandas DataFrame): string describes CSV file in which records data reside; DataFrame already contains records data; default value is the string 'asset_data.csv' start_year (integer): specifies calendar year of the input data; default value is ASSET_DATA_CSV_YEAR. Returns: Assets (class instance) Raises: ValueError: * if data is not the appropriate type. * if start_year is not an integer. * if files cannot be found. Notes: Typical usage when using ccc_asset_data.csv input data is as follows:: >>> assets = Assets() which uses all the default parameters of the constructor. """ # suppress pylint warnings about unrecognized Records variables: # pylint: disable=no-member # suppress pylint warnings about uppercase variable names: # pylint: disable=invalid-name # suppress pylint warnings about too many class instance attributes: # pylint: disable=too-many-instance-attributes ASSET_YEAR = ASSET_DATA_CSV_YEAR CUR_PATH = os.path.abspath(os.path.dirname(__file__)) VAR_INFO_FILENAME = "records_variables.json" def __init__( self, data=os.path.join(CUR_PATH, "ccc_asset_data.csv"), start_year=ASSET_DATA_CSV_YEAR, ): # pylint: disable=too-many-arguments,too-many-locals self.__data_year = start_year # read specified data self._read_data(data) # If have any checks on data, do there here... # specify that variable values do not include behavioral responses self.behavioral_responses_are_included = False @property def data_year(self): """ Records class original data year property. """ return self.__data_year @property def array_length(self): """ Length of arrays in Records class's DataFrame. """ return self.__dim @staticmethod def read_var_info(): """ Read Assets variables metadata from JSON file; returns dictionary and specifies static varname sets listed below. """ var_info_path = os.path.join(Assets.CUR_PATH, Assets.VAR_INFO_FILENAME) if os.path.exists(var_info_path): with open(var_info_path) as vfile: json_text = vfile.read() vardict = json_to_dict(json_text) else: # cannot call read_egg_ function in unit tests vardict = read_egg_json( Assets.VAR_INFO_FILENAME ) # pragma: no cover Assets.INTEGER_READ_VARS = set( k for k, v in vardict["read"].items() if v["type"] == "int" ) FLOAT_READ_VARS = set( k for k, v in vardict["read"].items() if v["type"] == "float" ) Assets.MUST_READ_VARS = set( k for k, v in vardict["read"].items() if v.get("required") ) Assets.USABLE_READ_VARS = Assets.INTEGER_READ_VARS | FLOAT_READ_VARS Assets.INTEGER_VARS = Assets.INTEGER_READ_VARS return vardict # specify various sets of variable names INTEGER_READ_VARS = set() MUST_READ_VARS = set() USABLE_READ_VARS = set() INTEGER_VARS = set()
[docs] def _read_data(self, data): """ Read Records data from file or use specified DataFrame as data. Args: data (string or Pandas DataFrame): data or path to data Returns: None """ # pylint: disable=too-many-statements,too-many-branches if Assets.INTEGER_VARS == set(): Assets.read_var_info() # read specified data if isinstance(data, pd.DataFrame): assetdf = data elif isinstance(data, str): if os.path.isfile(data): assetdf = pd.read_csv(data) # pragma: no cover else: # cannot call read_egg_ function in unit tests assetdf = read_egg_csv(data) # pragma: no cover else: msg = "data is neither a string nor a Pandas DataFrame" raise ValueError(msg) self.__dim = len(assetdf.index) self.__index = assetdf.index self.df = assetdf