Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/patsy/contrasts.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of Patsy
2# Copyright (C) 2011-2012 Nathaniel Smith <njs@pobox.com>
3# See file LICENSE.txt for license information.
5# http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm
6# http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm
8from __future__ import print_function
10# These are made available in the patsy.* namespace
11__all__ = ["ContrastMatrix", "Treatment", "Poly", "Sum", "Helmert", "Diff"]
13import sys
14import six
15import numpy as np
16from patsy import PatsyError
17from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
18 safe_issubdtype,
19 no_pickling, assert_no_pickling)
21class ContrastMatrix(object):
22 """A simple container for a matrix used for coding categorical factors.
24 Attributes:
26 .. attribute:: matrix
28 A 2d ndarray, where each column corresponds to one column of the
29 resulting design matrix, and each row contains the entries for a single
30 categorical variable level. Usually n-by-n for a full rank coding or
31 n-by-(n-1) for a reduced rank coding, though other options are
32 possible.
34 .. attribute:: column_suffixes
36 A list of strings to be appended to the factor name, to produce the
37 final column names. E.g. for treatment coding the entries will look
38 like ``"[T.level1]"``.
39 """
40 def __init__(self, matrix, column_suffixes):
41 self.matrix = np.asarray(matrix)
42 self.column_suffixes = column_suffixes
43 if self.matrix.shape[1] != len(column_suffixes):
44 raise PatsyError("matrix and column_suffixes don't conform")
46 __repr__ = repr_pretty_delegate
47 def _repr_pretty_(self, p, cycle):
48 repr_pretty_impl(p, self, [self.matrix, self.column_suffixes])
50 __getstate__ = no_pickling
52def test_ContrastMatrix():
53 cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
54 assert np.array_equal(cm.matrix, np.eye(2))
55 assert cm.column_suffixes == ["a", "b"]
56 # smoke test
57 repr(cm)
59 from nose.tools import assert_raises
60 assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])
62 assert_no_pickling(cm)
64# This always produces an object of the type that Python calls 'str' (whether
65# that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does
66# *not* make any particular guarantees about being reversible or having other
67# such useful programmatic properties -- it just produces something that will
68# be nice for users to look at.
69def _obj_to_readable_str(obj):
70 if isinstance(obj, str):
71 return obj
72 elif sys.version_info >= (3,) and isinstance(obj, bytes):
73 try:
74 return obj.decode("utf-8")
75 except UnicodeDecodeError:
76 return repr(obj)
77 elif sys.version_info < (3,) and isinstance(obj, unicode):
78 try:
79 return obj.encode("ascii")
80 except UnicodeEncodeError:
81 return repr(obj)
82 else:
83 return repr(obj)
85def test__obj_to_readable_str():
86 def t(obj, expected):
87 got = _obj_to_readable_str(obj)
88 assert type(got) is str
89 assert got == expected
90 t(1, "1")
91 t(1.0, "1.0")
92 t("asdf", "asdf")
93 t(six.u("asdf"), "asdf")
94 if sys.version_info >= (3,):
95 # we can use "foo".encode here b/c this is python 3!
96 # a utf-8 encoded euro-sign comes out as a real euro sign.
97 t("\u20ac".encode("utf-8"), six.u("\u20ac"))
98 # but a iso-8859-15 euro sign can't be decoded, and we fall back on
99 # repr()
100 t("\u20ac".encode("iso-8859-15"), "b'\\xa4'")
101 else:
102 t(six.u("\u20ac"), "u'\\u20ac'")
104def _name_levels(prefix, levels):
105 return ["[%s%s]" % (prefix, _obj_to_readable_str(level)) for level in levels]
107def test__name_levels():
108 assert _name_levels("a", ["b", "c"]) == ["[ab]", "[ac]"]
110def _dummy_code(levels):
111 return ContrastMatrix(np.eye(len(levels)), _name_levels("", levels))
113def _get_level(levels, level_ref):
114 if level_ref in levels:
115 return levels.index(level_ref)
116 if isinstance(level_ref, six.integer_types):
117 if level_ref < 0:
118 level_ref += len(levels)
119 if not (0 <= level_ref < len(levels)):
120 raise PatsyError("specified level %r is out of range"
121 % (level_ref,))
122 return level_ref
123 raise PatsyError("specified level %r not found" % (level_ref,))
125def test__get_level():
126 assert _get_level(["a", "b", "c"], 0) == 0
127 assert _get_level(["a", "b", "c"], -1) == 2
128 assert _get_level(["a", "b", "c"], "b") == 1
129 # For integer levels, we check identity before treating it as an index
130 assert _get_level([2, 1, 0], 0) == 2
131 from nose.tools import assert_raises
132 assert_raises(PatsyError, _get_level, ["a", "b"], 2)
133 assert_raises(PatsyError, _get_level, ["a", "b"], -3)
134 assert_raises(PatsyError, _get_level, ["a", "b"], "c")
136 if not six.PY3:
137 assert _get_level(["a", "b", "c"], long(0)) == 0
138 assert _get_level(["a", "b", "c"], long(-1)) == 2
139 assert _get_level([2, 1, 0], long(0)) == 2
142class Treatment(object):
143 """Treatment coding (also known as dummy coding).
145 This is the default coding.
147 For reduced-rank coding, one level is chosen as the "reference", and its
148 mean behaviour is represented by the intercept. Each column of the
149 resulting matrix represents the difference between the mean of one level
150 and this reference level.
152 For full-rank coding, classic "dummy" coding is used, and each column of
153 the resulting matrix represents the mean of the corresponding level.
155 The reference level defaults to the first level, or can be specified
156 explicitly.
158 .. ipython:: python
160 # reduced rank
161 dmatrix("C(a, Treatment)", balanced(a=3))
162 # full rank
163 dmatrix("0 + C(a, Treatment)", balanced(a=3))
164 # Setting a reference level
165 dmatrix("C(a, Treatment(1))", balanced(a=3))
166 dmatrix("C(a, Treatment('a2'))", balanced(a=3))
168 Equivalent to R ``contr.treatment``. The R documentation suggests that
169 using ``Treatment(reference=-1)`` will produce contrasts that are
170 "equivalent to those produced by many (but not all) SAS procedures".
171 """
172 def __init__(self, reference=None):
173 self.reference = reference
175 def code_with_intercept(self, levels):
176 return _dummy_code(levels)
178 def code_without_intercept(self, levels):
179 if self.reference is None:
180 reference = 0
181 else:
182 reference = _get_level(levels, self.reference)
183 eye = np.eye(len(levels) - 1)
184 contrasts = np.vstack((eye[:reference, :],
185 np.zeros((1, len(levels) - 1)),
186 eye[reference:, :]))
187 names = _name_levels("T.", levels[:reference] + levels[reference + 1:])
188 return ContrastMatrix(contrasts, names)
190 __getstate__ = no_pickling
192def test_Treatment():
193 t1 = Treatment()
194 matrix = t1.code_with_intercept(["a", "b", "c"])
195 assert matrix.column_suffixes == ["[a]", "[b]", "[c]"]
196 assert np.allclose(matrix.matrix, [[1, 0, 0], [0, 1, 0], [0, 0, 1]])
197 matrix = t1.code_without_intercept(["a", "b", "c"])
198 assert matrix.column_suffixes == ["[T.b]", "[T.c]"]
199 assert np.allclose(matrix.matrix, [[0, 0], [1, 0], [0, 1]])
200 matrix = Treatment(reference=1).code_without_intercept(["a", "b", "c"])
201 assert matrix.column_suffixes == ["[T.a]", "[T.c]"]
202 assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]])
203 matrix = Treatment(reference=-2).code_without_intercept(["a", "b", "c"])
204 assert matrix.column_suffixes == ["[T.a]", "[T.c]"]
205 assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]])
206 matrix = Treatment(reference="b").code_without_intercept(["a", "b", "c"])
207 assert matrix.column_suffixes == ["[T.a]", "[T.c]"]
208 assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]])
209 # Make sure the default is always the first level, even if there is a
210 # different level called 0.
211 matrix = Treatment().code_without_intercept([2, 1, 0])
212 assert matrix.column_suffixes == ["[T.1]", "[T.0]"]
213 assert np.allclose(matrix.matrix, [[0, 0], [1, 0], [0, 1]])
215class Poly(object):
216 """Orthogonal polynomial contrast coding.
218 This coding scheme treats the levels as ordered samples from an underlying
219 continuous scale, whose effect takes an unknown functional form which is
220 `Taylor-decomposed`__ into the sum of a linear, quadratic, etc. components.
222 .. __: https://en.wikipedia.org/wiki/Taylor_series
224 For reduced-rank coding, you get a linear column, a quadratic column,
225 etc., up to the number of levels provided.
227 For full-rank coding, the same scheme is used, except that the zero-order
228 constant polynomial is also included. I.e., you get an intercept column
229 included as part of your categorical term.
231 By default the levels are treated as equally spaced, but you can override
232 this by providing a value for the `scores` argument.
234 Examples:
236 .. ipython:: python
238 # Reduced rank
239 dmatrix("C(a, Poly)", balanced(a=4))
240 # Full rank
241 dmatrix("0 + C(a, Poly)", balanced(a=3))
242 # Explicit scores
243 dmatrix("C(a, Poly([1, 2, 10]))", balanced(a=3))
245 This is equivalent to R's ``contr.poly``. (But note that in R, reduced
246 rank encodings are always dummy-coded, regardless of what contrast you
247 have set.)
248 """
249 def __init__(self, scores=None):
250 self.scores = scores
252 def _code_either(self, intercept, levels):
253 n = len(levels)
254 scores = self.scores
255 if scores is None:
256 scores = np.arange(n)
257 scores = np.asarray(scores, dtype=float)
258 if len(scores) != n:
259 raise PatsyError("number of levels (%s) does not match"
260 " number of scores (%s)"
261 % (n, len(scores)))
262 # Strategy: just make a matrix whose columns are naive linear,
263 # quadratic, etc., functions of the raw scores, and then use 'qr' to
264 # orthogonalize each column against those to its left.
265 scores -= scores.mean()
266 raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1))
267 q, r = np.linalg.qr(raw_poly)
268 q *= np.sign(np.diag(r))
269 q /= np.sqrt(np.sum(q ** 2, axis=1))
270 # The constant term is always all 1's -- we don't normalize it.
271 q[:, 0] = 1
272 names = [".Constant", ".Linear", ".Quadratic", ".Cubic"]
273 names += ["^%s" % (i,) for i in range(4, n)]
274 names = names[:n]
275 if intercept:
276 return ContrastMatrix(q, names)
277 else:
278 # We always include the constant/intercept column as something to
279 # orthogonalize against, but we don't always return it:
280 return ContrastMatrix(q[:, 1:], names[1:])
282 def code_with_intercept(self, levels):
283 return self._code_either(True, levels)
285 def code_without_intercept(self, levels):
286 return self._code_either(False, levels)
288 __getstate__ = no_pickling
290def test_Poly():
291 t1 = Poly()
292 matrix = t1.code_with_intercept(["a", "b", "c"])
293 assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
294 # Values from R 'options(digits=15); contr.poly(3)'
295 expected = [[1, -7.07106781186548e-01, 0.408248290463863],
296 [1, 0, -0.816496580927726],
297 [1, 7.07106781186547e-01, 0.408248290463863]]
298 print(matrix.matrix)
299 assert np.allclose(matrix.matrix, expected)
300 matrix = t1.code_without_intercept(["a", "b", "c"])
301 assert matrix.column_suffixes == [".Linear", ".Quadratic"]
302 # Values from R 'options(digits=15); contr.poly(3)'
303 print(matrix.matrix)
304 assert np.allclose(matrix.matrix,
305 [[-7.07106781186548e-01, 0.408248290463863],
306 [0, -0.816496580927726],
307 [7.07106781186547e-01, 0.408248290463863]])
309 matrix = Poly(scores=[0, 10, 11]).code_with_intercept(["a", "b", "c"])
310 assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
311 # Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 11))'
312 print(matrix.matrix)
313 assert np.allclose(matrix.matrix,
314 [[1, -0.813733471206735, 0.0671156055214024],
315 [1, 0.348742916231458, -0.7382716607354268],
316 [1, 0.464990554975277, 0.6711560552140243]])
318 # we had an integer/float handling bug for score vectors whose mean was
319 # non-integer, so check one of those:
320 matrix = Poly(scores=[0, 10, 12]).code_with_intercept(["a", "b", "c"])
321 assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
322 # Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 12))'
323 print(matrix.matrix)
324 assert np.allclose(matrix.matrix,
325 [[1, -0.806559132617443, 0.127000127000191],
326 [1, 0.293294230042706, -0.762000762001143],
327 [1, 0.513264902574736, 0.635000635000952]])
329 from nose.tools import assert_raises
330 assert_raises(PatsyError,
331 Poly(scores=[0, 1]).code_with_intercept,
332 ["a", "b", "c"])
334 matrix = t1.code_with_intercept(list(range(6)))
335 assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic",
336 ".Cubic", "^4", "^5"]
339class Sum(object):
340 """Deviation coding (also known as sum-to-zero coding).
342 Compares the mean of each level to the mean-of-means. (In a balanced
343 design, compares the mean of each level to the overall mean.)
345 For full-rank coding, a standard intercept term is added.
347 One level must be omitted to avoid redundancy; by default this is the last
348 level, but this can be adjusted via the `omit` argument.
350 .. warning:: There are multiple definitions of 'deviation coding' in
351 use. Make sure this is the one you expect before trying to interpret
352 your results!
354 Examples:
356 .. ipython:: python
358 # Reduced rank
359 dmatrix("C(a, Sum)", balanced(a=4))
360 # Full rank
361 dmatrix("0 + C(a, Sum)", balanced(a=4))
362 # Omit a different level
363 dmatrix("C(a, Sum(1))", balanced(a=3))
364 dmatrix("C(a, Sum('a1'))", balanced(a=3))
366 This is equivalent to R's `contr.sum`.
367 """
368 def __init__(self, omit=None):
369 self.omit = omit
371 def _omit_i(self, levels):
372 if self.omit is None:
373 # We assume below that this is positive
374 return len(levels) - 1
375 else:
376 return _get_level(levels, self.omit)
378 def _sum_contrast(self, levels):
379 n = len(levels)
380 omit_i = self._omit_i(levels)
381 eye = np.eye(n - 1)
382 out = np.empty((n, n - 1))
383 out[:omit_i, :] = eye[:omit_i, :]
384 out[omit_i, :] = -1
385 out[omit_i + 1:, :] = eye[omit_i:, :]
386 return out
388 def code_with_intercept(self, levels):
389 contrast = self.code_without_intercept(levels)
390 matrix = np.column_stack((np.ones(len(levels)),
391 contrast.matrix))
392 column_suffixes = ["[mean]"] + contrast.column_suffixes
393 return ContrastMatrix(matrix, column_suffixes)
395 def code_without_intercept(self, levels):
396 matrix = self._sum_contrast(levels)
397 omit_i = self._omit_i(levels)
398 included_levels = levels[:omit_i] + levels[omit_i + 1:]
399 return ContrastMatrix(matrix, _name_levels("S.", included_levels))
401 __getstate__ = no_pickling
403def test_Sum():
404 t1 = Sum()
405 matrix = t1.code_with_intercept(["a", "b", "c"])
406 assert matrix.column_suffixes == ["[mean]", "[S.a]", "[S.b]"]
407 assert np.allclose(matrix.matrix, [[1, 1, 0], [1, 0, 1], [1, -1, -1]])
408 matrix = t1.code_without_intercept(["a", "b", "c"])
409 assert matrix.column_suffixes == ["[S.a]", "[S.b]"]
410 assert np.allclose(matrix.matrix, [[1, 0], [0, 1], [-1, -1]])
411 # Check that it's not thrown off by negative integer term names
412 matrix = t1.code_without_intercept([-1, -2, -3])
413 assert matrix.column_suffixes == ["[S.-1]", "[S.-2]"]
414 assert np.allclose(matrix.matrix, [[1, 0], [0, 1], [-1, -1]])
415 t2 = Sum(omit=1)
416 matrix = t2.code_with_intercept(["a", "b", "c"])
417 assert matrix.column_suffixes == ["[mean]", "[S.a]", "[S.c]"]
418 assert np.allclose(matrix.matrix, [[1, 1, 0], [1, -1, -1], [1, 0, 1]])
419 matrix = t2.code_without_intercept(["a", "b", "c"])
420 assert matrix.column_suffixes == ["[S.a]", "[S.c]"]
421 assert np.allclose(matrix.matrix, [[1, 0], [-1, -1], [0, 1]])
422 matrix = t2.code_without_intercept([1, 0, 2])
423 assert matrix.column_suffixes == ["[S.0]", "[S.2]"]
424 assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]])
425 t3 = Sum(omit=-3)
426 matrix = t3.code_with_intercept(["a", "b", "c"])
427 assert matrix.column_suffixes == ["[mean]", "[S.b]", "[S.c]"]
428 assert np.allclose(matrix.matrix, [[1, -1, -1], [1, 1, 0], [1, 0, 1]])
429 matrix = t3.code_without_intercept(["a", "b", "c"])
430 assert matrix.column_suffixes == ["[S.b]", "[S.c]"]
431 assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]])
432 t4 = Sum(omit="a")
433 matrix = t3.code_with_intercept(["a", "b", "c"])
434 assert matrix.column_suffixes == ["[mean]", "[S.b]", "[S.c]"]
435 assert np.allclose(matrix.matrix, [[1, -1, -1], [1, 1, 0], [1, 0, 1]])
436 matrix = t3.code_without_intercept(["a", "b", "c"])
437 assert matrix.column_suffixes == ["[S.b]", "[S.c]"]
438 assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]])
440class Helmert(object):
441 """Helmert contrasts.
443 Compares the second level with the first, the third with the average of
444 the first two, and so on.
446 For full-rank coding, a standard intercept term is added.
448 .. warning:: There are multiple definitions of 'Helmert coding' in
449 use. Make sure this is the one you expect before trying to interpret
450 your results!
452 Examples:
454 .. ipython:: python
456 # Reduced rank
457 dmatrix("C(a, Helmert)", balanced(a=4))
458 # Full rank
459 dmatrix("0 + C(a, Helmert)", balanced(a=4))
461 This is equivalent to R's `contr.helmert`.
462 """
463 def _helmert_contrast(self, levels):
464 n = len(levels)
465 #http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm#HELMERT
466 #contr = np.eye(n - 1)
467 #int_range = np.arange(n - 1., 1, -1)
468 #denom = np.repeat(int_range, np.arange(n - 2, 0, -1))
469 #contr[np.tril_indices(n - 1, -1)] = -1. / denom
471 #http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#HELMERT
472 #contr = np.zeros((n - 1., n - 1))
473 #int_range = np.arange(n, 1, -1)
474 #denom = np.repeat(int_range[:-1], np.arange(n - 2, 0, -1))
475 #contr[np.diag_indices(n - 1)] = (int_range - 1.) / int_range
476 #contr[np.tril_indices(n - 1, -1)] = -1. / denom
477 #contr = np.vstack((contr, -1./int_range))
479 #r-like
480 contr = np.zeros((n, n - 1))
481 contr[1:][np.diag_indices(n - 1)] = np.arange(1, n)
482 contr[np.triu_indices(n - 1)] = -1
483 return contr
485 def code_with_intercept(self, levels):
486 contrast = np.column_stack((np.ones(len(levels)),
487 self._helmert_contrast(levels)))
488 column_suffixes = _name_levels("H.", ["intercept"] + list(levels[1:]))
489 return ContrastMatrix(contrast, column_suffixes)
491 def code_without_intercept(self, levels):
492 contrast = self._helmert_contrast(levels)
493 return ContrastMatrix(contrast,
494 _name_levels("H.", levels[1:]))
496 __getstate__ = no_pickling
498def test_Helmert():
499 t1 = Helmert()
500 for levels in (["a", "b", "c", "d"], ("a", "b", "c", "d")):
501 matrix = t1.code_with_intercept(levels)
502 assert matrix.column_suffixes == ["[H.intercept]",
503 "[H.b]",
504 "[H.c]",
505 "[H.d]"]
506 assert np.allclose(matrix.matrix, [[1, -1, -1, -1],
507 [1, 1, -1, -1],
508 [1, 0, 2, -1],
509 [1, 0, 0, 3]])
510 matrix = t1.code_without_intercept(levels)
511 assert matrix.column_suffixes == ["[H.b]", "[H.c]", "[H.d]"]
512 assert np.allclose(matrix.matrix, [[-1, -1, -1],
513 [1, -1, -1],
514 [0, 2, -1],
515 [0, 0, 3]])
517class Diff(object):
518 """Backward difference coding.
520 This coding scheme is useful for ordered factors, and compares the mean of
521 each level with the preceding level. So you get the second level minus the
522 first, the third level minus the second, etc.
524 For full-rank coding, a standard intercept term is added (which gives the
525 mean value for the first level).
527 Examples:
529 .. ipython:: python
531 # Reduced rank
532 dmatrix("C(a, Diff)", balanced(a=3))
533 # Full rank
534 dmatrix("0 + C(a, Diff)", balanced(a=3))
535 """
536 def _diff_contrast(self, levels):
537 nlevels = len(levels)
538 contr = np.zeros((nlevels, nlevels-1))
539 int_range = np.arange(1, nlevels)
540 upper_int = np.repeat(int_range, int_range)
541 row_i, col_i = np.triu_indices(nlevels-1)
542 # we want to iterate down the columns not across the rows
543 # it would be nice if the index functions had a row/col order arg
544 col_order = np.argsort(col_i)
545 contr[row_i[col_order],
546 col_i[col_order]] = (upper_int-nlevels)/float(nlevels)
547 lower_int = np.repeat(int_range, int_range[::-1])
548 row_i, col_i = np.tril_indices(nlevels-1)
549 # we want to iterate down the columns not across the rows
550 col_order = np.argsort(col_i)
551 contr[row_i[col_order]+1, col_i[col_order]] = lower_int/float(nlevels)
552 return contr
554 def code_with_intercept(self, levels):
555 contrast = np.column_stack((np.ones(len(levels)),
556 self._diff_contrast(levels)))
557 return ContrastMatrix(contrast, _name_levels("D.", levels))
559 def code_without_intercept(self, levels):
560 contrast = self._diff_contrast(levels)
561 return ContrastMatrix(contrast, _name_levels("D.", levels[:-1]))
563 __getstate__ = no_pickling
565def test_diff():
566 t1 = Diff()
567 matrix = t1.code_with_intercept(["a", "b", "c", "d"])
568 assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]",
569 "[D.d]"]
570 assert np.allclose(matrix.matrix, [[1, -3/4., -1/2., -1/4.],
571 [1, 1/4., -1/2., -1/4.],
572 [1, 1/4., 1./2, -1/4.],
573 [1, 1/4., 1/2., 3/4.]])
574 matrix = t1.code_without_intercept(["a", "b", "c", "d"])
575 assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]"]
576 assert np.allclose(matrix.matrix, [[-3/4., -1/2., -1/4.],
577 [1/4., -1/2., -1/4.],
578 [1/4., 2./4, -1/4.],
579 [1/4., 1/2., 3/4.]])
581# contrast can be:
582# -- a ContrastMatrix
583# -- a simple np.ndarray
584# -- an object with code_with_intercept and code_without_intercept methods
585# -- a function returning one of the above
586# -- None, in which case the above rules are applied to 'default'
587# This function always returns a ContrastMatrix.
588def code_contrast_matrix(intercept, levels, contrast, default=None):
589 if contrast is None:
590 contrast = default
591 if callable(contrast):
592 contrast = contrast()
593 if isinstance(contrast, ContrastMatrix):
594 return contrast
595 as_array = np.asarray(contrast)
596 if safe_issubdtype(as_array.dtype, np.number):
597 return ContrastMatrix(as_array,
598 _name_levels("custom", range(as_array.shape[1])))
599 if intercept:
600 return contrast.code_with_intercept(levels)
601 else:
602 return contrast.code_without_intercept(levels)