Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Smoking and lung cancer in eight cities in China.""" 

2from statsmodels.datasets import utils as du 

3 

4__docformat__ = 'restructuredtext' 

5 

6COPYRIGHT = """Intern. J. Epidemiol. (1992)""" 

7TITLE = __doc__ 

8SOURCE = """ 

9Transcribed from Z. Liu, Smoking and Lung Cancer Incidence in China, 

10Intern. J. Epidemiol., 21:197-201, (1992). 

11""" 

12 

13DESCRSHORT = """Co-occurrence of lung cancer and smoking in 8 Chinese cities.""" 

14 

15DESCRLONG = """This is a series of 8 2x2 contingency tables showing the co-occurrence 

16of lung cancer and smoking in 8 Chinese cities. 

17""" 

18 

19NOTE = """:: 

20 

21 Number of Observations - 8 

22 Number of Variables - 3 

23 Variable name definitions:: 

24 

25 city_name - name of the city 

26 smoking - yes or no, according to a person's smoking behavior 

27 lung_cancer - yes or no, according to a person's lung cancer status 

28""" 

29 

30 

31def load_pandas(): 

32 """ 

33 Load the China smoking/lung cancer data and return a Dataset class. 

34 

35 Returns 

36 ------- 

37 Dataset instance: 

38 See DATASET_PROPOSAL.txt for more information. 

39 """ 

40 raw_data = du.load_csv(__file__, 'china_smoking.csv') 

41 data = raw_data.set_index('Location') 

42 dset = du.Dataset(data=data, title="Smoking and lung cancer in Chinese regions") 

43 dset.raw_data = raw_data 

44 return dset 

45 

46 

47def load(as_pandas=None): 

48 """ 

49 Load the China smoking/lung cancer data and return a Dataset class. 

50 

51 Parameters 

52 ---------- 

53 as_pandas : bool 

54 Flag indicating whether to return pandas DataFrames and Series 

55 or numpy recarrays and arrays. If True, returns pandas. 

56 

57 Returns 

58 ------- 

59 Dataset instance: 

60 See DATASET_PROPOSAL.txt for more information. 

61 """ 

62 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas, 

63 retain_index=True)