Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Compatibility tools for various data structure inputs 

3""" 

4import numpy as np 

5import pandas as pd 

6 

7 

8def _check_period_index(x, freq="M"): 

9 from pandas import PeriodIndex, DatetimeIndex 

10 if not isinstance(x.index, (DatetimeIndex, PeriodIndex)): 

11 raise ValueError("The index must be a DatetimeIndex or PeriodIndex") 

12 

13 if x.index.freq is not None: 

14 inferred_freq = x.index.freqstr 

15 else: 

16 inferred_freq = pd.infer_freq(x.index) 

17 if not inferred_freq.startswith(freq): 

18 raise ValueError("Expected frequency {}. Got {}".format(inferred_freq, 

19 freq)) 

20 

21 

22def is_data_frame(obj): 

23 return isinstance(obj, pd.DataFrame) 

24 

25 

26def is_design_matrix(obj): 

27 from patsy import DesignMatrix 

28 return isinstance(obj, DesignMatrix) 

29 

30 

31def _is_structured_ndarray(obj): 

32 return isinstance(obj, np.ndarray) and obj.dtype.names is not None 

33 

34 

35def interpret_data(data, colnames=None, rownames=None): 

36 """ 

37 Convert passed data structure to form required by estimation classes 

38 

39 Parameters 

40 ---------- 

41 data : array_like 

42 colnames : sequence or None 

43 May be part of data structure 

44 rownames : sequence or None 

45 

46 Returns 

47 ------- 

48 (values, colnames, rownames) : (homogeneous ndarray, list) 

49 """ 

50 if isinstance(data, np.ndarray): 

51 if _is_structured_ndarray(data): 

52 import warnings 

53 from statsmodels.tools.sm_exceptions import recarray_warning 

54 warnings.warn(recarray_warning, FutureWarning) 

55 if colnames is None: 

56 colnames = data.dtype.names 

57 values = struct_to_ndarray(data) 

58 else: 

59 values = data 

60 

61 if colnames is None: 

62 colnames = ['Y_%d' % i for i in range(values.shape[1])] 

63 elif is_data_frame(data): 

64 # XXX: hack 

65 data = data.dropna() 

66 values = data.values 

67 colnames = data.columns 

68 rownames = data.index 

69 else: # pragma: no cover 

70 raise TypeError('Cannot handle input type {typ}' 

71 .format(typ=type(data).__name__)) 

72 

73 if not isinstance(colnames, list): 

74 colnames = list(colnames) 

75 

76 # sanity check 

77 if len(colnames) != values.shape[1]: 

78 raise ValueError('length of colnames does not match number ' 

79 'of columns in data') 

80 

81 if rownames is not None and len(rownames) != len(values): 

82 raise ValueError('length of rownames does not match number ' 

83 'of rows in data') 

84 

85 return values, colnames, rownames 

86 

87 

88def struct_to_ndarray(arr): 

89 return arr.view((float, (len(arr.dtype.names),)), type=np.ndarray) 

90 

91 

92def _is_using_ndarray_type(endog, exog): 

93 return (type(endog) is np.ndarray and 

94 (type(exog) is np.ndarray or exog is None)) 

95 

96 

97def _is_using_ndarray(endog, exog): 

98 return (isinstance(endog, np.ndarray) and 

99 (isinstance(exog, np.ndarray) or exog is None)) 

100 

101 

102def _is_using_pandas(endog, exog): 

103 from statsmodels.compat.pandas import data_klasses as klasses 

104 return (isinstance(endog, klasses) or isinstance(exog, klasses)) 

105 

106 

107def _is_array_like(endog, exog): 

108 try: # do it like this in case of mixed types, ie., ndarray and list 

109 endog = np.asarray(endog) 

110 exog = np.asarray(exog) 

111 return True 

112 except: 

113 return False 

114 

115 

116def _is_using_patsy(endog, exog): 

117 # we get this when a structured array is passed through a formula 

118 return (is_design_matrix(endog) and 

119 (is_design_matrix(exog) or exog is None)) 

120 

121 

122def _is_recarray(data): 

123 """ 

124 Returns true if data is a recarray 

125 """ 

126 return isinstance(data, np.core.recarray)