Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of Patsy 

2# Copyright (C) 2011-2012 Nathaniel Smith <njs@pobox.com> 

3# See file LICENSE.txt for license information. 

4 

5# This file defines the ModelDesc class, which describes a model at a high 

6# level, as a list of interactions of factors. It also has the code to convert 

7# a formula parse tree (from patsy.parse_formula) into a ModelDesc. 

8 

9from __future__ import print_function 

10 

11import six 

12from patsy import PatsyError 

13from patsy.parse_formula import ParseNode, Token, parse_formula 

14from patsy.eval import EvalEnvironment, EvalFactor 

15from patsy.util import uniqueify_list 

16from patsy.util import repr_pretty_delegate, repr_pretty_impl 

17from patsy.util import no_pickling, assert_no_pickling 

18 

19# These are made available in the patsy.* namespace 

20__all__ = ["Term", "ModelDesc", "INTERCEPT"] 

21 

22# One might think it would make more sense for 'factors' to be a set, rather 

23# than a tuple-with-guaranteed-unique-entries-that-compares-like-a-set. The 

24# reason we do it this way is that it preserves the order that the user typed 

25# and is expecting, which then ends up producing nicer names in our final 

26# output, nicer column ordering, etc. (A similar comment applies to the 

27# ordering of terms in ModelDesc objects as a whole.) 

28class Term(object): 

29 """The interaction between a collection of factor objects. 

30 

31 This is one of the basic types used in representing formulas, and 

32 corresponds to an expression like ``"a:b:c"`` in a formula string. 

33 For details, see :ref:`formulas` and :ref:`expert-model-specification`. 

34 

35 Terms are hashable and compare by value. 

36 

37 Attributes: 

38  

39 .. attribute:: factors 

40 

41 A tuple of factor objects. 

42 """ 

43 def __init__(self, factors): 

44 self.factors = tuple(uniqueify_list(factors)) 

45 

46 def __eq__(self, other): 

47 return (isinstance(other, Term) 

48 and frozenset(other.factors) == frozenset(self.factors)) 

49 

50 def __ne__(self, other): 

51 return not self == other 

52 

53 def __hash__(self): 

54 return hash((Term, frozenset(self.factors))) 

55 

56 __repr__ = repr_pretty_delegate 

57 def _repr_pretty_(self, p, cycle): 

58 assert not cycle 

59 repr_pretty_impl(p, self, [list(self.factors)]) 

60 

61 def name(self): 

62 """Return a human-readable name for this term.""" 

63 if self.factors: 

64 return ":".join([f.name() for f in self.factors]) 

65 else: 

66 return "Intercept" 

67 

68 __getstate__ = no_pickling 

69 

70INTERCEPT = Term([]) 

71 

72class _MockFactor(object): 

73 def __init__(self, name): 

74 self._name = name 

75 

76 def name(self): 

77 return self._name 

78 

79def test_Term(): 

80 assert Term([1, 2, 1]).factors == (1, 2) 

81 assert Term([1, 2]) == Term([2, 1]) 

82 assert hash(Term([1, 2])) == hash(Term([2, 1])) 

83 f1 = _MockFactor("a") 

84 f2 = _MockFactor("b") 

85 assert Term([f1, f2]).name() == "a:b" 

86 assert Term([f2, f1]).name() == "b:a" 

87 assert Term([]).name() == "Intercept" 

88 

89 assert_no_pickling(Term([])) 

90 

91class ModelDesc(object): 

92 """A simple container representing the termlists parsed from a formula. 

93 

94 This is a simple container object which has exactly the same 

95 representational power as a formula string, but is a Python object 

96 instead. You can construct one by hand, and pass it to functions like 

97 :func:`dmatrix` or :func:`incr_dbuilder` that are expecting a formula 

98 string, but without having to do any messy string manipulation. For 

99 details see :ref:`expert-model-specification`. 

100 

101 Attributes: 

102 

103 .. attribute:: lhs_termlist 

104 rhs_termlist 

105 

106 Two termlists representing the left- and right-hand sides of a 

107 formula, suitable for passing to :func:`design_matrix_builders`. 

108 """ 

109 def __init__(self, lhs_termlist, rhs_termlist): 

110 self.lhs_termlist = uniqueify_list(lhs_termlist) 

111 self.rhs_termlist = uniqueify_list(rhs_termlist) 

112 

113 __repr__ = repr_pretty_delegate 

114 def _repr_pretty_(self, p, cycle): 

115 assert not cycle 

116 return repr_pretty_impl(p, self, 

117 [], 

118 [("lhs_termlist", self.lhs_termlist), 

119 ("rhs_termlist", self.rhs_termlist)]) 

120 

121 def describe(self): 

122 """Returns a human-readable representation of this :class:`ModelDesc` 

123 in pseudo-formula notation. 

124 

125 .. warning:: There is no guarantee that the strings returned by this 

126 function can be parsed as formulas. They are best-effort 

127 descriptions intended for human users. However, if this ModelDesc 

128 was created by parsing a formula, then it should work in 

129 practice. If you *really* have to. 

130 """ 

131 def term_code(term): 

132 if term == INTERCEPT: 

133 return "1" 

134 else: 

135 return term.name() 

136 result = " + ".join([term_code(term) for term in self.lhs_termlist]) 

137 if result: 

138 result += " ~ " 

139 else: 

140 result += "~ " 

141 if self.rhs_termlist == [INTERCEPT]: 

142 result += term_code(INTERCEPT) 

143 else: 

144 term_names = [] 

145 if INTERCEPT not in self.rhs_termlist: 

146 term_names.append("0") 

147 term_names += [term_code(term) for term in self.rhs_termlist 

148 if term != INTERCEPT] 

149 result += " + ".join(term_names) 

150 return result 

151 

152 @classmethod 

153 def from_formula(cls, tree_or_string): 

154 """Construct a :class:`ModelDesc` from a formula string. 

155 

156 :arg tree_or_string: A formula string. (Or an unevaluated formula 

157 parse tree, but the API for generating those isn't public yet. Shh, 

158 it can be our secret.) 

159 :returns: A new :class:`ModelDesc`. 

160 """ 

161 if isinstance(tree_or_string, ParseNode): 

162 tree = tree_or_string 

163 else: 

164 tree = parse_formula(tree_or_string) 

165 value = Evaluator().eval(tree, require_evalexpr=False) 

166 assert isinstance(value, cls) 

167 return value 

168 

169 __getstate__ = no_pickling 

170 

171def test_ModelDesc(): 

172 f1 = _MockFactor("a") 

173 f2 = _MockFactor("b") 

174 m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])]) 

175 assert m.lhs_termlist == [INTERCEPT, Term([f1])] 

176 assert m.rhs_termlist == [Term([f1]), Term([f1, f2])] 

177 print(m.describe()) 

178 assert m.describe() == "1 + a ~ 0 + a + a:b" 

179 

180 assert_no_pickling(m) 

181 

182 assert ModelDesc([], []).describe() == "~ 0" 

183 assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0" 

184 assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1" 

185 assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe() 

186 == "1 ~ b") 

187 

188def test_ModelDesc_from_formula(): 

189 for input in ("y ~ x", parse_formula("y ~ x")): 

190 md = ModelDesc.from_formula(input) 

191 assert md.lhs_termlist == [Term([EvalFactor("y")]),] 

192 assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])] 

193 

194class IntermediateExpr(object): 

195 "This class holds an intermediate result while we're evaluating a tree." 

196 def __init__(self, intercept, intercept_origin, intercept_removed, terms): 

197 self.intercept = intercept 

198 self.intercept_origin = intercept_origin 

199 self.intercept_removed =intercept_removed 

200 self.terms = tuple(uniqueify_list(terms)) 

201 if self.intercept: 

202 assert self.intercept_origin 

203 assert not (self.intercept and self.intercept_removed) 

204 

205 __repr__ = repr_pretty_delegate 

206 def _pretty_repr_(self, p, cycle): # pragma: no cover 

207 assert not cycle 

208 return repr_pretty_impl(p, self, 

209 [self.intercept, self.intercept_origin, 

210 self.intercept_removed, self.terms]) 

211 

212 __getstate__ = no_pickling 

213 

214def _maybe_add_intercept(doit, terms): 

215 if doit: 

216 return (INTERCEPT,) + terms 

217 else: 

218 return terms 

219 

220def _eval_any_tilde(evaluator, tree): 

221 exprs = [evaluator.eval(arg) for arg in tree.args] 

222 if len(exprs) == 1: 

223 # Formula was like: "~ foo" 

224 # We pretend that instead it was like: "0 ~ foo" 

225 exprs.insert(0, IntermediateExpr(False, None, True, [])) 

226 assert len(exprs) == 2 

227 # Note that only the RHS gets an implicit intercept: 

228 return ModelDesc(_maybe_add_intercept(exprs[0].intercept, exprs[0].terms), 

229 _maybe_add_intercept(not exprs[1].intercept_removed, 

230 exprs[1].terms)) 

231 

232def _eval_binary_plus(evaluator, tree): 

233 left_expr = evaluator.eval(tree.args[0]) 

234 if tree.args[1].type == "ZERO": 

235 return IntermediateExpr(False, None, True, left_expr.terms) 

236 else: 

237 right_expr = evaluator.eval(tree.args[1]) 

238 if right_expr.intercept: 

239 return IntermediateExpr(True, right_expr.intercept_origin, False, 

240 left_expr.terms + right_expr.terms) 

241 else: 

242 return IntermediateExpr(left_expr.intercept, 

243 left_expr.intercept_origin, 

244 left_expr.intercept_removed, 

245 left_expr.terms + right_expr.terms) 

246 

247 

248def _eval_binary_minus(evaluator, tree): 

249 left_expr = evaluator.eval(tree.args[0]) 

250 if tree.args[1].type == "ZERO": 

251 return IntermediateExpr(True, tree.args[1], False, 

252 left_expr.terms) 

253 elif tree.args[1].type == "ONE": 

254 return IntermediateExpr(False, None, True, left_expr.terms) 

255 else: 

256 right_expr = evaluator.eval(tree.args[1]) 

257 terms = [term for term in left_expr.terms 

258 if term not in right_expr.terms] 

259 if right_expr.intercept: 

260 return IntermediateExpr(False, None, True, terms) 

261 else: 

262 return IntermediateExpr(left_expr.intercept, 

263 left_expr.intercept_origin, 

264 left_expr.intercept_removed, 

265 terms) 

266 

267def _check_interactable(expr): 

268 if expr.intercept: 

269 raise PatsyError("intercept term cannot interact with " 

270 "anything else", expr.intercept_origin) 

271 

272def _interaction(left_expr, right_expr): 

273 for expr in (left_expr, right_expr): 

274 _check_interactable(expr) 

275 terms = [] 

276 for l_term in left_expr.terms: 

277 for r_term in right_expr.terms: 

278 terms.append(Term(l_term.factors + r_term.factors)) 

279 return IntermediateExpr(False, None, False, terms) 

280 

281def _eval_binary_prod(evaluator, tree): 

282 exprs = [evaluator.eval(arg) for arg in tree.args] 

283 return IntermediateExpr(False, None, False, 

284 exprs[0].terms 

285 + exprs[1].terms 

286 + _interaction(*exprs).terms) 

287 

288# Division (nesting) is right-ward distributive: 

289# a / (b + c) -> a/b + a/c -> a + a:b + a:c 

290# But left-ward, in S/R it has a quirky behavior: 

291# (a + b)/c -> a + b + a:b:c 

292# This is because it's meaningless for a factor to be "nested" under two 

293# different factors. (This is documented in Chambers and Hastie (page 30) as a 

294# "Slightly more subtle..." rule, with no further elaboration. Hopefully we 

295# will do better.) 

296def _eval_binary_div(evaluator, tree): 

297 left_expr = evaluator.eval(tree.args[0]) 

298 right_expr = evaluator.eval(tree.args[1]) 

299 terms = list(left_expr.terms) 

300 _check_interactable(left_expr) 

301 # Build a single giant combined term for everything on the left: 

302 left_factors = [] 

303 for term in left_expr.terms: 

304 left_factors += list(term.factors) 

305 left_combined_expr = IntermediateExpr(False, None, False, 

306 [Term(left_factors)]) 

307 # Then interact it with everything on the right: 

308 terms += list(_interaction(left_combined_expr, right_expr).terms) 

309 return IntermediateExpr(False, None, False, terms) 

310 

311def _eval_binary_interact(evaluator, tree): 

312 exprs = [evaluator.eval(arg) for arg in tree.args] 

313 return _interaction(*exprs) 

314 

315def _eval_binary_power(evaluator, tree): 

316 left_expr = evaluator.eval(tree.args[0]) 

317 _check_interactable(left_expr) 

318 power = -1 

319 if tree.args[1].type in ("ONE", "NUMBER"): 

320 expr = tree.args[1].token.extra 

321 try: 

322 power = int(expr) 

323 except ValueError: 

324 pass 

325 if power < 1: 

326 raise PatsyError("'**' requires a positive integer", tree.args[1]) 

327 all_terms = left_expr.terms 

328 big_expr = left_expr 

329 # Small optimization: (a + b)**100 is just the same as (a + b)**2. 

330 power = min(len(left_expr.terms), power) 

331 for i in range(1, power): 

332 big_expr = _interaction(left_expr, big_expr) 

333 all_terms = all_terms + big_expr.terms 

334 return IntermediateExpr(False, None, False, all_terms) 

335 

336def _eval_unary_plus(evaluator, tree): 

337 return evaluator.eval(tree.args[0]) 

338 

339def _eval_unary_minus(evaluator, tree): 

340 if tree.args[0].type == "ZERO": 

341 return IntermediateExpr(True, tree.origin, False, []) 

342 elif tree.args[0].type == "ONE": 

343 return IntermediateExpr(False, None, True, []) 

344 else: 

345 raise PatsyError("Unary minus can only be applied to 1 or 0", tree) 

346 

347def _eval_zero(evaluator, tree): 

348 return IntermediateExpr(False, None, True, []) 

349 

350def _eval_one(evaluator, tree): 

351 return IntermediateExpr(True, tree.origin, False, []) 

352 

353def _eval_number(evaluator, tree): 

354 raise PatsyError("numbers besides '0' and '1' are " 

355 "only allowed with **", tree) 

356 

357def _eval_python_expr(evaluator, tree): 

358 factor = EvalFactor(tree.token.extra, origin=tree.origin) 

359 return IntermediateExpr(False, None, False, [Term([factor])]) 

360 

361class Evaluator(object): 

362 def __init__(self): 

363 self._evaluators = {} 

364 self.add_op("~", 2, _eval_any_tilde) 

365 self.add_op("~", 1, _eval_any_tilde) 

366 

367 self.add_op("+", 2, _eval_binary_plus) 

368 self.add_op("-", 2, _eval_binary_minus) 

369 self.add_op("*", 2, _eval_binary_prod) 

370 self.add_op("/", 2, _eval_binary_div) 

371 self.add_op(":", 2, _eval_binary_interact) 

372 self.add_op("**", 2, _eval_binary_power) 

373 

374 self.add_op("+", 1, _eval_unary_plus) 

375 self.add_op("-", 1, _eval_unary_minus) 

376 

377 self.add_op("ZERO", 0, _eval_zero) 

378 self.add_op("ONE", 0, _eval_one) 

379 self.add_op("NUMBER", 0, _eval_number) 

380 self.add_op("PYTHON_EXPR", 0, _eval_python_expr) 

381 

382 # Not used by Patsy -- provided for the convenience of eventual 

383 # user-defined operators. 

384 self.stash = {} 

385 

386 # This should not be considered a public API yet (to use for actually 

387 # adding new operator semantics) because I wrote in some of the relevant 

388 # code sort of speculatively, but it isn't actually tested. 

389 def add_op(self, op, arity, evaluator): 

390 self._evaluators[op, arity] = evaluator 

391 

392 def eval(self, tree, require_evalexpr=True): 

393 result = None 

394 assert isinstance(tree, ParseNode) 

395 key = (tree.type, len(tree.args)) 

396 if key not in self._evaluators: 

397 raise PatsyError("I don't know how to evaluate this " 

398 "'%s' operator" % (tree.type,), 

399 tree.token) 

400 result = self._evaluators[key](self, tree) 

401 if require_evalexpr and not isinstance(result, IntermediateExpr): 

402 if isinstance(result, ModelDesc): 

403 raise PatsyError("~ can only be used once, and " 

404 "only at the top level", 

405 tree) 

406 else: 

407 raise PatsyError("custom operator returned an " 

408 "object that I don't know how to " 

409 "handle", tree) 

410 return result 

411 

412############# 

413 

414_eval_tests = { 

415 "": (True, []), 

416 " ": (True, []), 

417 " \n ": (True, []), 

418 "a": (True, ["a"]), 

419 

420 "1": (True, []), 

421 "0": (False, []), 

422 "- 1": (False, []), 

423 "- 0": (True, []), 

424 "+ 1": (True, []), 

425 "+ 0": (False, []), 

426 "0 + 1": (True, []), 

427 "1 + 0": (False, []), 

428 "1 - 0": (True, []), 

429 "0 - 1": (False, []), 

430 

431 "1 + a": (True, ["a"]), 

432 "0 + a": (False, ["a"]), 

433 "a - 1": (False, ["a"]), 

434 "a - 0": (True, ["a"]), 

435 "1 - a": (True, []), 

436 

437 "a + b": (True, ["a", "b"]), 

438 "(a + b)": (True, ["a", "b"]), 

439 "a + ((((b))))": (True, ["a", "b"]), 

440 "a + ((((+b))))": (True, ["a", "b"]), 

441 "a + ((((b - a))))": (True, ["a", "b"]), 

442 

443 "a + a + a": (True, ["a"]), 

444 

445 "a + (b - a)": (True, ["a", "b"]), 

446 

447 "a + np.log(a, base=10)": (True, ["a", "np.log(a, base=10)"]), 

448 # Note different spacing: 

449 "a + np.log(a, base=10) - np . log(a , base = 10)": (True, ["a"]), 

450 

451 "a + (I(b) + c)": (True, ["a", "I(b)", "c"]), 

452 "a + I(b + c)": (True, ["a", "I(b + c)"]), 

453 

454 "a:b": (True, [("a", "b")]), 

455 "a:b:a": (True, [("a", "b")]), 

456 "a:(b + c)": (True, [("a", "b"), ("a", "c")]), 

457 "(a + b):c": (True, [("a", "c"), ("b", "c")]), 

458 "a:(b - c)": (True, [("a", "b")]), 

459 "c + a:c + a:(b - c)": (True, ["c", ("a", "c"), ("a", "b")]), 

460 "(a - b):c": (True, [("a", "c")]), 

461 "b + b:c + (a - b):c": (True, ["b", ("b", "c"), ("a", "c")]), 

462 

463 "a:b - a:b": (True, []), 

464 "a:b - b:a": (True, []), 

465 

466 "1 - (a + b)": (True, []), 

467 "a + b - (a + b)": (True, []), 

468 

469 "a * b": (True, ["a", "b", ("a", "b")]), 

470 "a * b * a": (True, ["a", "b", ("a", "b")]), 

471 "a * (b + c)": (True, ["a", "b", "c", ("a", "b"), ("a", "c")]), 

472 "(a + b) * c": (True, ["a", "b", "c", ("a", "c"), ("b", "c")]), 

473 "a * (b - c)": (True, ["a", "b", ("a", "b")]), 

474 "c + a:c + a * (b - c)": (True, ["c", ("a", "c"), "a", "b", ("a", "b")]), 

475 "(a - b) * c": (True, ["a", "c", ("a", "c")]), 

476 "b + b:c + (a - b) * c": (True, ["b", ("b", "c"), "a", "c", ("a", "c")]), 

477 

478 "a/b": (True, ["a", ("a", "b")]), 

479 "(a + b)/c": (True, ["a", "b", ("a", "b", "c")]), 

480 "b + b:c + (a - b)/c": (True, ["b", ("b", "c"), "a", ("a", "c")]), 

481 "a/(b + c)": (True, ["a", ("a", "b"), ("a", "c")]), 

482 

483 "a ** 2": (True, ["a"]), 

484 "(a + b + c + d) ** 2": (True, ["a", "b", "c", "d", 

485 ("a", "b"), ("a", "c"), ("a", "d"), 

486 ("b", "c"), ("b", "d"), ("c", "d")]), 

487 "(a + b + c + d) ** 3": (True, ["a", "b", "c", "d", 

488 ("a", "b"), ("a", "c"), ("a", "d"), 

489 ("b", "c"), ("b", "d"), ("c", "d"), 

490 ("a", "b", "c"), ("a", "b", "d"), 

491 ("a", "c", "d"), ("b", "c", "d")]), 

492 

493 "a + +a": (True, ["a"]), 

494 

495 "~ a + b": (True, ["a", "b"]), 

496 "~ a*b": (True, ["a", "b", ("a", "b")]), 

497 "~ a*b + 0": (False, ["a", "b", ("a", "b")]), 

498 "~ -1": (False, []), 

499 

500 "0 ~ a + b": (True, ["a", "b"]), 

501 "1 ~ a + b": (True, [], True, ["a", "b"]), 

502 "y ~ a + b": (False, ["y"], True, ["a", "b"]), 

503 "0 + y ~ a + b": (False, ["y"], True, ["a", "b"]), 

504 "0 + y * z ~ a + b": (False, ["y", "z", ("y", "z")], True, ["a", "b"]), 

505 "-1 ~ 1": (False, [], True, []), 

506 "1 + y ~ a + b": (True, ["y"], True, ["a", "b"]), 

507 

508 # Check precedence: 

509 "a + b * c": (True, ["a", "b", "c", ("b", "c")]), 

510 "a * b + c": (True, ["a", "b", ("a", "b"), "c"]), 

511 "a * b - a": (True, ["b", ("a", "b")]), 

512 "a + b / c": (True, ["a", "b", ("b", "c")]), 

513 "a / b + c": (True, ["a", ("a", "b"), "c"]), 

514 "a*b:c": (True, ["a", ("b", "c"), ("a", "b", "c")]), 

515 "a:b*c": (True, [("a", "b"), "c", ("a", "b", "c")]), 

516 

517 # Intercept handling: 

518 "~ 1 + 1 + 0 + 1": (True, []), 

519 "~ 0 + 1 + 0": (False, []), 

520 "~ 0 - 1 - 1 + 0 + 1": (True, []), 

521 "~ 1 - 1": (False, []), 

522 "~ 0 + a + 1": (True, ["a"]), 

523 "~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising! 

524 "~ 0 + (a + 1)": (True, ["a"]), # Also correct! 

525 "~ 1 - (a + 1)": (False, []), 

526} 

527 

528# <> mark off where the error should be reported: 

529_eval_error_tests = [ 

530 "a <+>", 

531 "a + <(>", 

532 

533 "b + <(-a)>", 

534 

535 "a:<1>", 

536 "(a + <1>)*b", 

537 

538 "a + <2>", 

539 "a + <1.0>", 

540 # eh, catching this is a hassle, we'll just leave the user some rope if 

541 # they really want it: 

542 #"a + <0x1>", 

543 

544 "a ** <b>", 

545 "a ** <(1 + 1)>", 

546 "a ** <1.5>", 

547 

548 "a + b <# asdf>", 

549 

550 "<)>", 

551 "a + <)>", 

552 "<*> a", 

553 "a + <*>", 

554 

555 "a + <foo[bar>", 

556 "a + <foo{bar>", 

557 "a + <foo(bar>", 

558 

559 "a + <[bar>", 

560 "a + <{bar>", 

561 

562 "a + <{bar[]>", 

563 

564 "a + foo<]>bar", 

565 "a + foo[]<]>bar", 

566 "a + foo{}<}>bar", 

567 "a + foo<)>bar", 

568 

569 "a + b<)>", 

570 "(a) <.>", 

571 

572 "<(>a + b", 

573 

574 "<y ~ a> ~ b", 

575 "y ~ <(a ~ b)>", 

576 "<~ a> ~ b", 

577 "~ <(a ~ b)>", 

578 

579 "1 + <-(a + b)>", 

580 

581 "<- a>", 

582 "a + <-a**2>", 

583] 

584 

585def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover 

586 if expected_intercept: 

587 expecteds = [()] + expecteds 

588 assert len(terms) == len(expecteds) 

589 for term, expected in zip(terms, expecteds): 

590 if isinstance(term, Term): 

591 if isinstance(expected, str): 

592 expected = (expected,) 

593 assert term.factors == tuple([EvalFactor(s) for s in expected]) 

594 else: 

595 assert term == expected 

596 

597def _do_eval_formula_tests(tests): # pragma: no cover 

598 for code, result in six.iteritems(tests): 

599 if len(result) == 2: 

600 result = (False, []) + result 

601 model_desc = ModelDesc.from_formula(code) 

602 print(repr(code)) 

603 print(result) 

604 print(model_desc) 

605 lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result 

606 _assert_terms_match(model_desc.lhs_termlist, 

607 lhs_intercept, lhs_termlist) 

608 _assert_terms_match(model_desc.rhs_termlist, 

609 rhs_intercept, rhs_termlist) 

610 

611def test_eval_formula(): 

612 _do_eval_formula_tests(_eval_tests) 

613 

614def test_eval_formula_error_reporting(): 

615 from patsy.parse_formula import _parsing_error_test 

616 parse_fn = lambda formula: ModelDesc.from_formula(formula) 

617 _parsing_error_test(parse_fn, _eval_error_tests) 

618 

619def test_formula_factor_origin(): 

620 from patsy.origin import Origin 

621 desc = ModelDesc.from_formula("a + b") 

622 assert (desc.rhs_termlist[1].factors[0].origin 

623 == Origin("a + b", 0, 1)) 

624 assert (desc.rhs_termlist[2].factors[0].origin 

625 == Origin("a + b", 4, 5)) 

626