Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/datasets/grunfeld/data.py : 57%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Grunfeld (1950) Investment Data"""
2import pandas as pd
4from statsmodels.datasets import utils as du
6__docformat__ = 'restructuredtext'
8COPYRIGHT = """This is public domain."""
9TITLE = __doc__
10SOURCE = """This is the Grunfeld (1950) Investment Data.
12The source for the data was the original 11-firm data set from Grunfeld's Ph.D.
13thesis recreated by Kleiber and Zeileis (2008) "The Grunfeld Data at 50".
14The data can be found here.
15http://statmath.wu-wien.ac.at/~zeileis/grunfeld/
17For a note on the many versions of the Grunfeld data circulating see:
18http://www.stanford.edu/~clint/bench/grunfeld.htm
19"""
21DESCRSHORT = """Grunfeld (1950) Investment Data for 11 U.S. Firms."""
23DESCRLONG = DESCRSHORT
25NOTE = """::
27 Number of observations - 220 (20 years for 11 firms)
29 Number of variables - 5
31 Variables name definitions::
33 invest - Gross investment in 1947 dollars
34 value - Market value as of Dec. 31 in 1947 dollars
35 capital - Stock of plant and equipment in 1947 dollars
36 firm - General Motors, US Steel, General Electric, Chrysler,
37 Atlantic Refining, IBM, Union Oil, Westinghouse, Goodyear,
38 Diamond Match, American Steel
39 year - 1935 - 1954
41 Note that raw_data has firm expanded to dummy variables, since it is a
42 string categorical variable.
43"""
45def load(as_pandas=None):
46 """
47 Loads the Grunfeld data and returns a Dataset class.
49 Parameters
50 ----------
51 as_pandas : bool
52 Flag indicating whether to return pandas DataFrames and Series
53 or numpy recarrays and arrays. If True, returns pandas.
55 Returns
56 -------
57 Dataset instance:
58 See DATASET_PROPOSAL.txt for more information.
60 Notes
61 -----
62 raw_data has the firm variable expanded to dummy variables for each
63 firm (ie., there is no reference dummy)
64 """
65 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas)
67def load_pandas():
68 """
69 Loads the Grunfeld data and returns a Dataset class.
71 Returns
72 -------
73 Dataset instance:
74 See DATASET_PROPOSAL.txt for more information.
76 Notes
77 -----
78 raw_data has the firm variable expanded to dummy variables for each
79 firm (ie., there is no reference dummy)
80 """
81 data = _get_data()
82 data.year = data.year.astype(float)
83 raw_data = pd.get_dummies(data)
84 ds = du.process_pandas(data, endog_idx=0)
85 ds.raw_data = raw_data
86 return ds
89def _get_data():
90 data = du.load_csv(__file__, 'grunfeld.csv')
91 return data