Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tools/data.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Compatibility tools for various data structure inputs
3"""
4import numpy as np
5import pandas as pd
8def _check_period_index(x, freq="M"):
9 from pandas import PeriodIndex, DatetimeIndex
10 if not isinstance(x.index, (DatetimeIndex, PeriodIndex)):
11 raise ValueError("The index must be a DatetimeIndex or PeriodIndex")
13 if x.index.freq is not None:
14 inferred_freq = x.index.freqstr
15 else:
16 inferred_freq = pd.infer_freq(x.index)
17 if not inferred_freq.startswith(freq):
18 raise ValueError("Expected frequency {}. Got {}".format(inferred_freq,
19 freq))
22def is_data_frame(obj):
23 return isinstance(obj, pd.DataFrame)
26def is_design_matrix(obj):
27 from patsy import DesignMatrix
28 return isinstance(obj, DesignMatrix)
31def _is_structured_ndarray(obj):
32 return isinstance(obj, np.ndarray) and obj.dtype.names is not None
35def interpret_data(data, colnames=None, rownames=None):
36 """
37 Convert passed data structure to form required by estimation classes
39 Parameters
40 ----------
41 data : array_like
42 colnames : sequence or None
43 May be part of data structure
44 rownames : sequence or None
46 Returns
47 -------
48 (values, colnames, rownames) : (homogeneous ndarray, list)
49 """
50 if isinstance(data, np.ndarray):
51 if _is_structured_ndarray(data):
52 import warnings
53 from statsmodels.tools.sm_exceptions import recarray_warning
54 warnings.warn(recarray_warning, FutureWarning)
55 if colnames is None:
56 colnames = data.dtype.names
57 values = struct_to_ndarray(data)
58 else:
59 values = data
61 if colnames is None:
62 colnames = ['Y_%d' % i for i in range(values.shape[1])]
63 elif is_data_frame(data):
64 # XXX: hack
65 data = data.dropna()
66 values = data.values
67 colnames = data.columns
68 rownames = data.index
69 else: # pragma: no cover
70 raise TypeError('Cannot handle input type {typ}'
71 .format(typ=type(data).__name__))
73 if not isinstance(colnames, list):
74 colnames = list(colnames)
76 # sanity check
77 if len(colnames) != values.shape[1]:
78 raise ValueError('length of colnames does not match number '
79 'of columns in data')
81 if rownames is not None and len(rownames) != len(values):
82 raise ValueError('length of rownames does not match number '
83 'of rows in data')
85 return values, colnames, rownames
88def struct_to_ndarray(arr):
89 return arr.view((float, (len(arr.dtype.names),)), type=np.ndarray)
92def _is_using_ndarray_type(endog, exog):
93 return (type(endog) is np.ndarray and
94 (type(exog) is np.ndarray or exog is None))
97def _is_using_ndarray(endog, exog):
98 return (isinstance(endog, np.ndarray) and
99 (isinstance(exog, np.ndarray) or exog is None))
102def _is_using_pandas(endog, exog):
103 from statsmodels.compat.pandas import data_klasses as klasses
104 return (isinstance(endog, klasses) or isinstance(exog, klasses))
107def _is_array_like(endog, exog):
108 try: # do it like this in case of mixed types, ie., ndarray and list
109 endog = np.asarray(endog)
110 exog = np.asarray(exog)
111 return True
112 except:
113 return False
116def _is_using_patsy(endog, exog):
117 # we get this when a structured array is passed through a formula
118 return (is_design_matrix(endog) and
119 (is_design_matrix(exog) or exog is None))
122def _is_recarray(data):
123 """
124 Returns true if data is a recarray
125 """
126 return isinstance(data, np.core.recarray)