Source code for ccc.data
"""
Cost-of-Capital-Calculator asset data class.
"""
# CODING-STYLE CHECKS:
# pycodestyle records.py
# pylint --disable=locally-disabled records.py
import os
import pandas as pd
from ccc.utils import read_egg_csv, read_egg_json, json_to_dict
from ccc.utils import ASSET_DATA_CSV_YEAR
[docs]
class Assets:
"""
Constructor for the asset-entity type Records class.
Args:
data (string or Pandas DataFrame): string describes CSV file in
which records data reside; DataFrame already contains records
data; default value is the string 'asset_data.csv'
start_year (integer): specifies calendar year of the input data;
default value is ASSET_DATA_CSV_YEAR.
Returns:
Assets (class instance)
Raises:
ValueError:
* if data is not the appropriate type.
* if start_year is not an integer.
* if files cannot be found.
Notes:
Typical usage when using ccc_asset_data.csv input data is as follows::
>>> assets = Assets()
which uses all the default parameters of the constructor.
"""
# suppress pylint warnings about unrecognized Records variables:
# pylint: disable=no-member
# suppress pylint warnings about uppercase variable names:
# pylint: disable=invalid-name
# suppress pylint warnings about too many class instance attributes:
# pylint: disable=too-many-instance-attributes
ASSET_YEAR = ASSET_DATA_CSV_YEAR
CUR_PATH = os.path.abspath(os.path.dirname(__file__))
VAR_INFO_FILENAME = "records_variables.json"
def __init__(
self,
data=os.path.join(CUR_PATH, "ccc_asset_data.csv"),
start_year=ASSET_DATA_CSV_YEAR,
):
# pylint: disable=too-many-arguments,too-many-locals
self.__data_year = start_year
# read specified data
self._read_data(data)
# If have any checks on data, do there here...
# specify that variable values do not include behavioral responses
self.behavioral_responses_are_included = False
@property
def data_year(self):
"""
Records class original data year property.
"""
return self.__data_year
@property
def array_length(self):
"""
Length of arrays in Records class's DataFrame.
"""
return self.__dim
@staticmethod
def read_var_info():
"""
Read Assets variables metadata from JSON file; returns
dictionary and specifies static varname sets listed below.
"""
var_info_path = os.path.join(Assets.CUR_PATH, Assets.VAR_INFO_FILENAME)
if os.path.exists(var_info_path):
with open(var_info_path) as vfile:
json_text = vfile.read()
vardict = json_to_dict(json_text)
else:
# cannot call read_egg_ function in unit tests
vardict = read_egg_json(
Assets.VAR_INFO_FILENAME
) # pragma: no cover
Assets.INTEGER_READ_VARS = set(
k for k, v in vardict["read"].items() if v["type"] == "int"
)
FLOAT_READ_VARS = set(
k for k, v in vardict["read"].items() if v["type"] == "float"
)
Assets.MUST_READ_VARS = set(
k for k, v in vardict["read"].items() if v.get("required")
)
Assets.USABLE_READ_VARS = Assets.INTEGER_READ_VARS | FLOAT_READ_VARS
Assets.INTEGER_VARS = Assets.INTEGER_READ_VARS
return vardict
# specify various sets of variable names
INTEGER_READ_VARS = set()
MUST_READ_VARS = set()
USABLE_READ_VARS = set()
INTEGER_VARS = set()
[docs]
def _read_data(self, data):
"""
Read Records data from file or use specified DataFrame as data.
Args:
data (string or Pandas DataFrame): data or path to data
Returns:
None
"""
# pylint: disable=too-many-statements,too-many-branches
if Assets.INTEGER_VARS == set():
Assets.read_var_info()
# read specified data
if isinstance(data, pd.DataFrame):
assetdf = data
elif isinstance(data, str):
if os.path.isfile(data):
assetdf = pd.read_csv(data) # pragma: no cover
else:
# cannot call read_egg_ function in unit tests
assetdf = read_egg_csv(data) # pragma: no cover
else:
msg = "data is neither a string nor a Pandas DataFrame"
raise ValueError(msg)
self.__dim = len(assetdf.index)
self.__index = assetdf.index
self.df = assetdf