Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/datasets/nile/data.py : 67%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Nile River Flows."""
2import pandas as pd
4from statsmodels.datasets import utils as du
6__docformat__ = 'restructuredtext'
8COPYRIGHT = """This is public domain."""
9TITLE = """Nile River flows at Ashwan 1871-1970"""
10SOURCE = """
11This data is first analyzed in:
13 Cobb, G. W. 1978. "The Problem of the Nile: Conditional Solution to a
14 Changepoint Problem." *Biometrika*. 65.2, 243-51.
15"""
17DESCRSHORT = """This dataset contains measurements on the annual flow of
18the Nile as measured at Ashwan for 100 years from 1871-1970."""
20DESCRLONG = DESCRSHORT + " There is an apparent changepoint near 1898."
22#suggested notes
23NOTE = """::
25 Number of observations: 100
26 Number of variables: 2
27 Variable name definitions:
29 year - the year of the observations
30 volumne - the discharge at Aswan in 10^8, m^3
31"""
34def load(as_pandas=None):
35 """
36 Load the Nile data and return a Dataset class instance.
38 Parameters
39 ----------
40 as_pandas : bool
41 Flag indicating whether to return pandas DataFrames and Series
42 or numpy recarrays and arrays. If True, returns pandas.
44 Returns
45 -------
46 Dataset instance:
47 See DATASET_PROPOSAL.txt for more information.
48 """
49 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas)
52def load_pandas():
53 data = _get_data()
54 # TODO: time series
55 endog = pd.Series(data['volume'], index=data['year'].astype(int))
56 dataset = du.Dataset(data=data, names=list(data.columns), endog=endog, endog_name='volume')
57 return dataset
60def _get_data():
61 return du.load_csv(__file__, 'nile.csv').astype(float)