Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/datasets/china_smoking/data.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Smoking and lung cancer in eight cities in China."""
2from statsmodels.datasets import utils as du
4__docformat__ = 'restructuredtext'
6COPYRIGHT = """Intern. J. Epidemiol. (1992)"""
7TITLE = __doc__
8SOURCE = """
9Transcribed from Z. Liu, Smoking and Lung Cancer Incidence in China,
10Intern. J. Epidemiol., 21:197-201, (1992).
11"""
13DESCRSHORT = """Co-occurrence of lung cancer and smoking in 8 Chinese cities."""
15DESCRLONG = """This is a series of 8 2x2 contingency tables showing the co-occurrence
16of lung cancer and smoking in 8 Chinese cities.
17"""
19NOTE = """::
21 Number of Observations - 8
22 Number of Variables - 3
23 Variable name definitions::
25 city_name - name of the city
26 smoking - yes or no, according to a person's smoking behavior
27 lung_cancer - yes or no, according to a person's lung cancer status
28"""
31def load_pandas():
32 """
33 Load the China smoking/lung cancer data and return a Dataset class.
35 Returns
36 -------
37 Dataset instance:
38 See DATASET_PROPOSAL.txt for more information.
39 """
40 raw_data = du.load_csv(__file__, 'china_smoking.csv')
41 data = raw_data.set_index('Location')
42 dset = du.Dataset(data=data, title="Smoking and lung cancer in Chinese regions")
43 dset.raw_data = raw_data
44 return dset
47def load(as_pandas=None):
48 """
49 Load the China smoking/lung cancer data and return a Dataset class.
51 Parameters
52 ----------
53 as_pandas : bool
54 Flag indicating whether to return pandas DataFrames and Series
55 or numpy recarrays and arrays. If True, returns pandas.
57 Returns
58 -------
59 Dataset instance:
60 See DATASET_PROPOSAL.txt for more information.
61 """
62 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas,
63 retain_index=True)