Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""RAND Health Insurance Experiment Data""" 

2from statsmodels.datasets import utils as du 

3 

4__docformat__ = 'restructuredtext' 

5 

6COPYRIGHT = """This is in the public domain.""" 

7TITLE = __doc__ 

8SOURCE = """ 

9The data was collected by the RAND corporation as part of the Health 

10Insurance Experiment (HIE). 

11 

12http://www.rand.org/health/projects/hie.html 

13 

14This data was used in:: 

15 

16 Cameron, A.C. amd Trivedi, P.K. 2005. `Microeconometrics: Methods 

17 and Applications,` Cambridge: New York. 

18 

19And was obtained from: <http://cameron.econ.ucdavis.edu/mmabook/mmadata.html> 

20 

21See randhie/src for the original data and description. The data included 

22here contains only a subset of the original data. The data varies slightly 

23compared to that reported in Cameron and Trivedi. 

24""" 

25 

26DESCRSHORT = """The RAND Co. Health Insurance Experiment Data""" 

27 

28DESCRLONG = """""" 

29 

30NOTE = """:: 

31 

32 Number of observations - 20,190 

33 Number of variables - 10 

34 Variable name definitions:: 

35 

36 mdvis - Number of outpatient visits to an MD 

37 lncoins - ln(coinsurance + 1), 0 <= coninsurance <= 100 

38 idp - 1 if individual deductible plan, 0 otherwise 

39 lpi - ln(max(1, annual participation incentive payment)) 

40 fmde - 0 if idp = 1; ln(max(1, MDE/(0.01 coinsurance))) otherwise 

41 physlm - 1 if the person has a physical limitation 

42 disea - number of chronic diseases 

43 hlthg - 1 if self-rated health is good 

44 hlthf - 1 if self-rated health is fair 

45 hlthp - 1 if self-rated health is poor 

46 (Omitted category is excellent self-rated health) 

47""" 

48 

49 

50def load(as_pandas=None): 

51 """ 

52 Loads the RAND HIE data and returns a Dataset class. 

53 

54 Parameters 

55 ---------- 

56 as_pandas : bool 

57 Flag indicating whether to return pandas DataFrames and Series 

58 or numpy recarrays and arrays. If True, returns pandas. 

59 

60 Returns 

61 ------- 

62 Dataset instance: 

63 See DATASET_PROPOSAL.txt for more information. 

64 

65 Notes 

66 ----- 

67 endog - response variable, mdvis 

68 exog - design 

69 """ 

70 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

71 

72 

73def load_pandas(): 

74 """ 

75 Loads the RAND HIE data and returns a Dataset class. 

76 

77 Returns 

78 ------- 

79 Dataset instance: 

80 See DATASET_PROPOSAL.txt for more information. 

81 

82 Notes 

83 ----- 

84 endog - response variable, mdvis 

85 exog - design 

86 """ 

87 return du.process_pandas(_get_data(), endog_idx=0) 

88 

89 

90def _get_data(): 

91 return du.load_csv(__file__, 'randhie.csv')