Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""First 100 days of the US House of Representatives 1995""" 

2from statsmodels.datasets import utils as du 

3 

4__docformat__ = 'restructuredtext' 

5 

6COPYRIGHT = """Used with express permission from the original author, 

7who retains all rights.""" 

8TITLE = __doc__ 

9SOURCE = """ 

10Jeff Gill's `Generalized Linear Models: A Unifited Approach` 

11 

12http://jgill.wustl.edu/research/books.html 

13""" 

14 

15DESCRSHORT = """Number of bill assignments in the 104th House in 1995""" 

16 

17DESCRLONG = """The example in Gill, seeks to explain the number of bill 

18assignments in the first 100 days of the US' 104th House of Representatives. 

19The response variable is the number of bill assignments in the first 100 days 

20over 20 Committees. The explanatory variables in the example are the number of 

21assignments in the first 100 days of the 103rd House, the number of members on 

22the committee, the number of subcommittees, the log of the number of staff 

23assigned to the committee, a dummy variable indicating whether 

24the committee is a high prestige committee, and an interaction term between 

25the number of subcommittees and the log of the staff size. 

26 

27The data returned by load are not cleaned to represent the above example. 

28""" 

29 

30NOTE = """:: 

31 

32 Number of Observations - 20 

33 Number of Variables - 6 

34 Variable name definitions:: 

35 

36 BILLS104 - Number of bill assignments in the first 100 days of the 

37 104th House of Representatives. 

38 SIZE - Number of members on the committee. 

39 SUBS - Number of subcommittees. 

40 STAFF - Number of staff members assigned to the committee. 

41 PRESTIGE - PRESTIGE == 1 is a high prestige committee. 

42 BILLS103 - Number of bill assignments in the first 100 days of the 

43 103rd House of Representatives. 

44 

45 Committee names are included as a variable in the data file though not 

46 returned by load. 

47""" 

48 

49 

50def load_pandas(): 

51 data = _get_data() 

52 return du.process_pandas(data, endog_idx=0) 

53 

54 

55def load(as_pandas=None): 

56 """Load the committee data and returns a data class. 

57 

58 Parameters 

59 ---------- 

60 as_pandas : bool 

61 Flag indicating whether to return pandas DataFrames and Series 

62 or numpy recarrays and arrays. If True, returns pandas. 

63 

64 Returns 

65 ------- 

66 Dataset instance: 

67 See DATASET_PROPOSAL.txt for more information. 

68 """ 

69 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

70 

71 

72def _get_data(): 

73 data = du.load_csv(__file__, 'committee.csv') 

74 data = data.iloc[:, 1:7].astype(float) 

75 return data