Package nflgame :: Module game
[frames] | no frames]

Source Code for Module nflgame.game

  1  from collections import namedtuple 
  2  import os 
  3  import os.path as path 
  4  import gzip 
  5  import json 
  6  import sys 
  7  import urllib2 
  8   
  9  from nflgame import OrderedDict 
 10  import nflgame.player 
 11  import nflgame.seq 
 12  import nflgame.statmap 
 13   
 14  _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz') 
 15  _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json" 
 16   
 17  GameDiff = namedtuple('GameDiff', ['plays', 'players']) 
 18  """ 
 19  Represents the difference between two points in time of the same game 
 20  in terms of plays and player statistics. 
 21  """ 
 22   
 23  TeamStats = namedtuple('TeamStats', 
 24                         ['first_downs', 'total_yds', 'passing_yds', 
 25                          'rushing_yds', 'penalty_cnt', 'penalty_yds', 
 26                          'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg', 
 27                          'pos_time']) 
 28  """A collection of team statistics for an entire game.""" 
 29   
 30   
31 -class FieldPosition (object):
32 """ 33 Represents field position. 34 35 The representation here is an integer offset where the 50 yard line 36 corresponds to '0'. Being in the own territory corresponds to a negative 37 offset while being in the opponent's territory corresponds to a positive 38 offset. 39 40 e.g., NE has the ball on the NE 45, the offset is -5. 41 e.g., NE has the ball on the NYG 2, the offset is 48. 42 """
43 - def __new__(cls, pos_team, yardline):
44 if not yardline: 45 return None 46 return object.__new__(cls)
47
48 - def __init__(self, pos_team, yardline):
49 """ 50 pos_team is the team on offense, and yardline is a string formatted 51 like 'team-territory yard-line'. e.g., "NE 32". 52 """ 53 if yardline == '50': 54 self.offset = 0 55 return 56 57 territory, yd_str = yardline.split() 58 yd = int(yd_str) 59 if territory == pos_team: 60 self.offset = -(50 - yd) 61 else: 62 self.offset = 50 - yd
63
64 - def __cmp__(self, other):
65 return cmp(self.offset, other.offset)
66
67 - def __str__(self):
68 return '%d' % self.offset
69 70
71 -class PossessionTime (object):
72 """ 73 Represents the amount of time a drive lasted in (minutes, seconds). 74 """
75 - def __init__(self, clock):
76 self.clock = clock 77 self.minutes, self.seconds = map(int, self.clock.split(':'))
78
79 - def total_seconds(self):
80 return self.seconds + self.minutes * 60
81
82 - def __cmp__(self, other):
83 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds) 84 return cmp(a, b)
85
86 - def __add__(self, other):
87 new_time = PossessionTime('0:00') 88 total_seconds = self.total_seconds() + other.total_seconds() 89 new_time.minutes = total_seconds / 60 90 new_time.seconds = total_seconds % 60 91 new_time.clock = '%.2d:%.2d' % (new_time.minutes, new_time.seconds) 92 return new_time
93
94 - def __sub__(self, other):
95 assert self >= other 96 new_time = PossessionTime('0:00') 97 total_seconds = self.total_seconds() - other.total_seconds() 98 new_time.minutes = total_seconds / 60 99 new_time.seconds = total_seconds % 60 100 new_time.clock = '%.2d:%.2d' % (new_time.minutes, new_time.seconds) 101 return new_time
102
103 - def __str__(self):
104 return self.clock
105 106
107 -class GameClock (object):
108 """ 109 Represents the current time in a game. Namely, it keeps track of the 110 quarter and clock time. Also, GameClock can represent whether 111 the game hasn't started yet, is half time or if it's over. 112 """
113 - def __init__(self, qtr, clock):
114 self.qtr = qtr 115 self.clock = clock 116 117 # Make it easy for comparison. 118 # try: 119 self.__minutes, self.__seconds = map(int, self.clock.split(':')) 120 # except ValueError: 121 # self.__minutes, self.__seconds = 0, 0 122 try: 123 self.__qtr = int(self.qtr) 124 if self.__qtr >= 3: 125 self.__qtr += 1 # Let halftime be quarter 3 126 except ValueError: 127 if self.is_pregame(): 128 self.__qtr = 0 129 elif self.is_halftime(): 130 self.__qtr = 3 131 elif self.is_final(): 132 self.__qtr = sys.maxint 133 else: 134 assert False, 'Unknown QTR value: "%s"' % self.qtr
135
136 - def is_pregame(self):
137 return self.qtr == 'Pregame'
138
139 - def is_halftime(self):
140 return self.qtr == 'Halftime'
141
142 - def is_final(self):
143 return self.qtr == 'Final' or self.qtr == 'final overtime'
144
145 - def __cmp__(self, other):
146 if self.__qtr != other.__qtr: 147 return cmp(self.__qtr, other.__qtr) 148 elif self.__minutes != other.__minutes: 149 return cmp(other.__minutes, self.__minutes) 150 return cmp(other.__seconds, self.__seconds)
151
152 - def __str__(self):
153 """ 154 Returns a nicely formatted string indicating the current time of the 155 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime" 156 and "Final". 157 """ 158 try: 159 q = int(self.qtr) 160 return 'Q%d %s' % (q, self.clock) 161 except ValueError: 162 return self.qtr
163 164
165 -class Game (object):
166 """ 167 Game represents a single pre- or regular-season game. It provides a window 168 into the statistics of every player that played into the game, along with 169 the winner of the game, the score and a list of all the scoring plays. 170 """ 171
172 - def __new__(cls, eid=None, fpath=None):
173 # If we can't get a valid JSON data, exit out and return None. 174 try: 175 rawData = _get_json_data(eid, fpath) 176 except urllib2.URLError: 177 return None 178 if rawData is None or rawData.strip() == '{}': 179 return None 180 game = object.__new__(cls) 181 game.rawData = rawData 182 return game
183
184 - def __init__(self, eid=None, fpath=None):
185 """ 186 Creates a new Game instance given a game identifier. 187 188 The game identifier is used by NFL.com's GameCenter live update web 189 pages. It is used to construct a URL to download JSON data for the 190 game. 191 192 If the game has been completed, the JSON data will be cached to disk 193 so that subsequent accesses will not re-download the data but instead 194 read it from disk. 195 196 When the JSON data is written to disk, it is compressed using gzip. 197 """ 198 199 if eid is not None: 200 self.eid = eid 201 self.data = json.loads(self.rawData)[self.eid] 202 else: 203 self.eid = None 204 self.data = json.loads(self.rawData) 205 for k, v in self.data.iteritems(): 206 if isinstance(v, dict): 207 self.eid = k 208 self.data = v 209 break 210 assert self.eid is not None 211 212 # Home and team cumulative statistics. 213 self.home = self.data['home']['abbr'] 214 self.away = self.data['away']['abbr'] 215 self.stats_home = _json_team_stats(self.data['home']['stats']['team']) 216 self.stats_away = _json_team_stats(self.data['away']['stats']['team']) 217 218 # Load up some simple static values. 219 self.time = GameClock(self.data['qtr'], self.data['clock']) 220 self.down = _tryint(self.data['down']) 221 self.togo = _tryint(self.data['togo']) 222 self.score_home = int(self.data['home']['score']['T']) 223 self.score_away = int(self.data['away']['score']['T']) 224 for q in (1, 2, 3, 4, 5): 225 for team in ('home', 'away'): 226 score = self.data[team]['score'][str(q)] 227 self.__dict__['score_%s_q%d' % (team, q)] = int(score) 228 229 if not self.game_over(): 230 self.winner = None 231 else: 232 if self.score_home > self.score_away: 233 self.winner = self.home 234 self.loser = self.away 235 elif self.score_away > self.score_home: 236 self.winner = self.away 237 self.loser = self.home 238 else: 239 self.winner = '%s/%s' % (self.home, self.away) 240 self.loser = '%s/%s' % (self.home, self.away) 241 242 # Load the scoring summary into a simple list of strings. 243 self.scores = [] 244 for k in sorted(map(int, self.data['scrsummary'])): 245 play = self.data['scrsummary'][str(k)] 246 s = '%s - Q%d - %s - %s' \ 247 % (play['team'], play['qtr'], play['type'], play['desc']) 248 self.scores.append(s) 249 250 # Check to see if the game is over, and if so, cache the data. 251 if self.game_over() and not os.access(_jsonf % eid, os.R_OK): 252 self.save()
253
254 - def is_home(self, team):
255 """Returns true if team (i.e., 'NE') is the home team.""" 256 return team == self.home
257
258 - def game_over(self):
259 """game_over returns true if the game is no longer being played.""" 260 return self.time.is_final()
261
262 - def playing(self):
263 """playing returns true if the game is currently being played.""" 264 return not self.time.is_pregame() and not self.time.is_final()
265
266 - def save(self, fpath=None):
267 """ 268 Save the JSON data to fpath. This is done automatically if the 269 game is over. 270 """ 271 if fpath is None: 272 fpath = _jsonf % self.eid 273 try: 274 print >> gzip.open(fpath, 'w+'), self.rawData, 275 except IOError: 276 print >> sys.stderr, "Could not cache JSON data. Please " \ 277 "make '%s' writable." \ 278 % os.path.dirname(fpath)
279
280 - def nice_score(self):
281 """ 282 Returns a string of the score of the game. 283 e.g., "NE (32) vs. NYG (0)". 284 """ 285 return '%s (%d) vs. %s (%d)' \ 286 % (self.home, self.score_home, self.away, self.score_away)
287
288 - def __getattr__(self, name):
289 if name == 'players': 290 self.__players = _json_game_player_stats(self.data) 291 self.players = nflgame.seq.GenPlayerStats(self.__players) 292 return self.players 293 if name == 'drives': 294 self.__drives = _json_drives(self, self.home, self.data['drives']) 295 self.drives = nflgame.seq.GenDrives(self.__drives) 296 return self.drives
297
298 - def __sub__(self, other):
299 return diff(other, self)
300
301 - def __str__(self):
302 return self.nice_score()
303 304
305 -def diff(before, after):
306 """ 307 Returns the difference between two points of time in a game in terms of 308 plays and player statistics. The return value is a GameDiff namedtuple 309 with two attributes: plays and players. Each contains *only* the data 310 that is in the after game but not in the before game. 311 312 This is useful for sending alerts where you're guaranteed to see each 313 play statistic only once (assuming NFL.com behaves itself). 314 315 XXX: There is an assertion that requires after's game clock be the same 316 or later than before's game clock. This may need to be removed if NFL.com 317 allows its game clock to be rolled back due to corrections from refs. 318 """ 319 assert after.time >= before.time, \ 320 'When diffing two games, "after" (%s) must be later or the ' \ 321 'same time as "before" (%s).' % (after.time, before.time) 322 assert after.eid == before.eid 323 324 plays = [] 325 after_plays = list(after.drives.plays()) 326 before_plays = list(before.drives.plays()) 327 for play in after_plays: 328 if play not in before_plays: 329 plays.append(play) 330 331 # You might think that updated play data is enough. You could scan 332 # it for statistics you're looking for (like touchdowns). 333 # But sometimes a play can sneak in twice if its description gets 334 # updated (late call? play review? etc.) 335 # Thus, we do a diff on the play statistics for player data too. 336 _players = OrderedDict() 337 after_players = list(after.drives.players()) 338 before_players = list(before.drives.players()) 339 for aplayer in after_players: 340 has_before = False 341 for bplayer in before_players: 342 if aplayer.playerid == bplayer.playerid: 343 has_before = True 344 pdiff = aplayer - bplayer 345 if pdiff is not None: 346 _players[aplayer.playerid] = pdiff 347 if not has_before: 348 _players[aplayer.playerid] = aplayer 349 players = nflgame.seq.GenPlayerStats(_players) 350 351 return GameDiff(plays=plays, players=players)
352 353
354 -class Drive (object):
355 """ 356 Drive represents a single drive in an NFL game. It contains a list 357 of all plays that happened in the drive, in chronological order. 358 It also contains meta information about the drive such as the start 359 and stop times and field position, length of possession, the number 360 of first downs and a short descriptive string of the result of the 361 drive. 362 363 """
364 - def __init__(self, game, drive_num, home_team, data):
365 if data is None: 366 return 367 self.game = game 368 self.drive_num = drive_num 369 self.team = data['posteam'] 370 self.home = self.team == home_team 371 self.first_downs = int(data['fds']) 372 self.result = data['result'] 373 self.penalty_yds = int(data['penyds']) 374 self.total_yds = int(data['ydsgained']) 375 self.pos_time = PossessionTime(data['postime']) 376 self.play_cnt = int(data['numplays']) 377 self.field_start = FieldPosition(self.team, data['start']['yrdln']) 378 self.time_start = GameClock(data['start']['qtr'], 379 data['start']['time']) 380 381 # When the game is over, the yardline isn't reported. So find the 382 # last play that does report a yardline. 383 if data['end']['yrdln'].strip(): 384 self.field_end = FieldPosition(self.team, data['end']['yrdln']) 385 else: 386 self.field_end = None 387 playids = sorted(map(int, data['plays'].keys()), reverse=True) 388 for pid in playids: 389 yrdln = data['plays'][str(pid)]['yrdln'].strip() 390 if yrdln: 391 self.field_end = FieldPosition(self.team, yrdln) 392 break 393 if self.field_end is None: 394 self.field_end = FieldPosition(self.team, '50') 395 396 # When a drive lasts from Q1 to Q2 or Q3 to Q4, the 'end' doesn't 397 # seem to change to the proper quarter. So look at the last play and 398 # use that quarter instead. 399 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1]) 400 endqtr = data['plays'][lastplayid]['qtr'] 401 self.time_end = GameClock(endqtr, data['end']['time']) 402 403 self.__plays = _json_plays(self, data['plays']) 404 self.plays = nflgame.seq.GenPlays(self.__plays)
405
406 - def __add__(self, other):
407 """ 408 Adds the statistics of two drives together. 409 410 Note that once two drives are added, the following fields 411 automatically get None values: result, field_start, field_end, 412 time_start and time_end. 413 """ 414 assert self.team == other.team, \ 415 'Cannot add drives from different teams "%s" and "%s".' \ 416 % (self.team, other.team) 417 new_drive = Drive(None, 0, '', None) 418 new_drive.team = self.team 419 new_drive.home = self.home 420 new_drive.first_downs = self.first_downs + other.first_downs 421 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds 422 new_drive.total_yds = self.total_yds + other.total_yds 423 new_drive.pos_time = self.pos_time + other.pos_time 424 new_drive.play_cnt = self.play_cnt + other.play_cnt 425 new_drive.__plays = self.__plays + other.__plays 426 new_drive.result = None 427 new_drive.field_start = None 428 new_drive.field_end = None 429 new_drive.time_start = None 430 new_drive.time_end = None 431 return new_drive
432
433 - def __str__(self):
434 return '%s (Start: %s, End: %s) %s' \ 435 % (self.team, self.time_start, self.time_end, self.result)
436 437
438 -class Play (object):
439 """ 440 Play represents a single play. It contains a list of all players 441 that participated in the play (including offense, defense and special 442 teams). The play also includes meta information about what down it 443 is, field position, clock time, etc. 444 445 Play objects also contain team-level statistics, such as whether the 446 play was a first down, a fourth down failure, etc. 447 """
448 - def __init__(self, drive, playid, data):
449 self.data = data 450 self.drive = drive 451 self.playid = playid 452 self.team = data['posteam'] 453 self.home = self.drive.home 454 self.desc = data['desc'] 455 self.note = data['note'] 456 self.down = int(data['down']) 457 self.yards_togo = int(data['ydstogo']) 458 self.touchdown = 'touchdown' in self.desc.lower() 459 460 if not self.team: 461 self.time, self.yardline = None, None 462 else: 463 self.time = GameClock(data['qtr'], data['time']) 464 self.yardline = FieldPosition(self.team, data['yrdln']) 465 466 # Load team statistics directly into the Play instance. 467 # Things like third down attempts, first downs, etc. 468 if '0' in data['players']: 469 for info in data['players']['0']: 470 if info['statId'] not in nflgame.statmap.idmap: 471 continue 472 statvals = nflgame.statmap.values(info['statId'], 473 info['yards']) 474 for k, v in statvals.iteritems(): 475 self.__dict__[k] = self.__dict__.get(k, 0) + v 476 477 # Load the sequence of "events" in a play into a list of dictionaries. 478 self.events = _json_play_events(data['players']) 479 480 # Now load cumulative player data for this play into 481 # a GenPlayerStats generator. We then flatten this data 482 # and add it to the play itself so that plays can be 483 # filter by these statistics. 484 self.__players = _json_play_players(self, data['players']) 485 self.players = nflgame.seq.GenPlayerStats(self.__players) 486 for p in self.players: 487 for k, v in p.stats.iteritems(): 488 # Sometimes we may see duplicate statistics (like tackle 489 # assists). Let's just overwrite in this case, since this 490 # data is from the perspective of the play. i.e., there 491 # is one assisted tackle rather than two. 492 self.__dict__[k] = v
493
494 - def has_player(self, playerid):
495 """Whether a player with id playerid participated in this play.""" 496 return playerid in self.__players
497
498 - def __str__(self):
499 if self.team: 500 if self.down != 0: 501 return '(%s, %s, %d and %d) %s' \ 502 % (self.team, self.data['yrdln'], 503 self.down, self.yards_togo, self.desc) 504 else: 505 return '(%s, %s) %s' \ 506 % (self.team, self.data['yrdln'], self.desc) 507 return self.desc
508
509 - def __eq__(self, other):
510 """ 511 We use the play description to determine equality because the 512 play description can be changed. (Like when a play is reversed.) 513 """ 514 return self.playid == other.playid and self.desc == other.desc
515
516 - def __getattr__(self, name):
517 return 0
518 519
520 -def _json_team_stats(data):
521 """ 522 Takes a team stats JSON entry and converts it to a TeamStats namedtuple. 523 """ 524 return TeamStats( 525 first_downs=int(data['totfd']), 526 total_yds=int(data['totyds']), 527 passing_yds=int(data['pyds']), 528 rushing_yds=int(data['ryds']), 529 penalty_cnt=int(data['pen']), 530 penalty_yds=int(data['penyds']), 531 turnovers=int(data['trnovr']), 532 punt_cnt=int(data['pt']), 533 punt_yds=int(data['ptyds']), 534 punt_avg=int(data['ptavg']), 535 pos_time=PossessionTime(data['top']))
536 537
538 -def _json_drives(game, home_team, data):
539 """ 540 Takes a home or away JSON entry and converts it to a list of Drive 541 objects. 542 """ 543 drive_nums = [] 544 for drive_num in data: 545 try: 546 drive_nums.append(int(drive_num)) 547 except: 548 pass 549 drives = [] 550 playids = set() # Plays can be repeated! Ah! 551 for i, drive_num in enumerate(sorted(drive_nums), 1): 552 repeat_drive = False 553 for playid in data[str(drive_num)]['plays']: 554 if playid in playids: 555 repeat_drive = True 556 break 557 playids.add(playid) 558 if repeat_drive: 559 continue 560 drives.append(Drive(game, i, home_team, data[str(drive_num)])) 561 return drives
562 563
564 -def _json_plays(drive, data):
565 """ 566 Takes a single JSON drive entry (data) and converts it to a list 567 of Play objects. 568 """ 569 plays = [] 570 for playid in map(str, sorted(map(int, data))): 571 plays.append(Play(drive, playid, data[playid])) 572 return plays
573 574
575 -def _json_play_players(play, data):
576 """ 577 Takes a single JSON play entry (data) and converts it to an OrderedDict 578 of player statistics. 579 580 play is the instance of Play that this data is part of. It is used 581 to determine whether the player belong to the home team or not. 582 """ 583 players = OrderedDict() 584 for playerid, statcats in data.iteritems(): 585 if playerid == '0': 586 continue 587 for info in statcats: 588 if info['statId'] not in nflgame.statmap.idmap: 589 continue 590 if playerid not in players: 591 home = play.drive.game.is_home(info['clubcode']) 592 stats = nflgame.player.PlayPlayerStats(playerid, 593 info['playerName'], 594 home) 595 players[playerid] = stats 596 statvals = nflgame.statmap.values(info['statId'], info['yards']) 597 players[playerid]._add_stats(statvals) 598 return players
599 600
601 -def _json_play_events(data):
602 """ 603 Takes a single JSON play entry (data) and converts it to a list of events. 604 """ 605 temp = list() 606 for playerid, statcats in data.iteritems(): 607 for info in statcats: 608 if info['statId'] not in nflgame.statmap.idmap: 609 continue 610 statvals = nflgame.statmap.values(info['statId'], info['yards']) 611 statvals['playerid'] = None if playerid == '0' else playerid 612 statvals['playername'] = info['playerName'] or None 613 statvals['team'] = info['clubcode'] 614 temp.append((int(info['sequence']), statvals)) 615 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
616 617
618 -def _json_game_player_stats(data):
619 """ 620 Parses the 'home' and 'away' team stats and returns an OrderedDict 621 mapping player id to their total game statistics as instances of 622 nflgame.player.GamePlayerStats. 623 """ 624 players = OrderedDict() 625 for team in ('home', 'away'): 626 for category in nflgame.statmap.categories: 627 if category not in data[team]['stats']: 628 continue 629 for pid, raw in data[team]['stats'][category].iteritems(): 630 stats = {} 631 for k, v in raw.iteritems(): 632 if k == 'name': 633 continue 634 stats['%s_%s' % (category, k)] = v 635 if pid not in players: 636 home = team == 'home' 637 players[pid] = nflgame.player.GamePlayerStats(pid, 638 raw['name'], 639 home) 640 players[pid]._add_stats(stats) 641 return players
642 643
644 -def _get_json_data(eid=None, fpath=None):
645 """ 646 Returns the JSON data corresponding to the game represented by eid. 647 648 If the JSON data is already on disk, it is read, decompressed and returned. 649 650 Otherwise, the JSON data is downloaded from the NFL web site. If the data 651 doesn't exist yet or there was an error, _get_json_data returns None. 652 653 If eid is None, then the JSON data is read from the file at fpath. 654 """ 655 assert eid is not None or fpath is not None 656 657 if fpath is not None: 658 return gzip.open(fpath).read() 659 660 fpath = _jsonf % eid 661 if os.access(fpath, os.R_OK): 662 return gzip.open(fpath).read() 663 try: 664 return urllib2.urlopen(_json_base_url % (eid, eid)).read() 665 except urllib2.HTTPError: 666 pass 667 return None
668 669
670 -def _tryint(v):
671 """ 672 Tries to convert v to an integer. If it fails, return 0. 673 """ 674 try: 675 return int(v) 676 except: 677 return 0
678