1 from collections import namedtuple
2 import os
3 import os.path as path
4 import gzip
5 import json
6 import sys
7 import urllib2
8
9 from nflgame import OrderedDict
10 import nflgame.player
11 import nflgame.seq
12 import nflgame.statmap
13
14 _MAX_INT = sys.maxint
15
16 _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz')
17 _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json"
18
19 GameDiff = namedtuple('GameDiff', ['before', 'after', 'plays', 'players'])
20 """
21 Represents the difference between two points in time of the same game
22 in terms of plays and player statistics.
23 """
24
25 TeamStats = namedtuple('TeamStats',
26 ['first_downs', 'total_yds', 'passing_yds',
27 'rushing_yds', 'penalty_cnt', 'penalty_yds',
28 'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg',
29 'pos_time'])
30 """A collection of team statistics for an entire game."""
31
32
34 """
35 Represents field position.
36
37 The representation here is an integer offset where the 50 yard line
38 corresponds to '0'. Being in the own territory corresponds to a negative
39 offset while being in the opponent's territory corresponds to a positive
40 offset.
41
42 e.g., NE has the ball on the NE 45, the offset is -5.
43 e.g., NE has the ball on the NYG 2, the offset is 48.
44 """
45 - def __new__(cls, pos_team, yardline):
46 if not yardline:
47 return None
48 return object.__new__(cls)
49
51 """
52 pos_team is the team on offense, and yardline is a string formatted
53 like 'team-territory yard-line'. e.g., "NE 32".
54 """
55 if yardline == '50':
56 self.offset = 0
57 return
58
59 territory, yd_str = yardline.split()
60 yd = int(yd_str)
61 if territory == pos_team:
62 self.offset = -(50 - yd)
63 else:
64 self.offset = 50 - yd
65
67 return cmp(self.offset, other.offset)
68
70 return '%d' % self.offset
71
72
74 """
75 Represents the amount of time a drive lasted in (minutes, seconds).
76 """
78 self.clock = clock
79 self.minutes, self.seconds = map(int, self.clock.split(':'))
80
82 """
83 Returns the total number of seconds that this possession lasted for.
84 """
85 return self.seconds + self.minutes * 60
86
88 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds)
89 return cmp(a, b)
90
98
107
110
111
113 """
114 Represents the current time in a game. Namely, it keeps track of the
115 quarter and clock time. Also, GameClock can represent whether
116 the game hasn't started yet, is half time or if it's over.
117 """
119 self.qtr = qtr
120 self.clock = clock
121
122 try:
123 self.__minutes, self.__seconds = map(int, self.clock.split(':'))
124 except ValueError:
125 self.__minutes, self.__seconds = 0, 0
126 try:
127 self.__qtr = int(self.qtr)
128 if self.__qtr >= 3:
129 self.__qtr += 1
130 except ValueError:
131 if self.is_pregame():
132 self.__qtr = 0
133 elif self.is_halftime():
134 self.__qtr = 3
135 elif self.is_final():
136 self.__qtr = sys.maxint
137 else:
138 assert False, 'Unknown QTR value: "%s"' % self.qtr
139
141 return self.qtr == 'Pregame'
142
144 return self.qtr == 'Halftime'
145
147 return self.qtr == 'Final' or self.qtr == 'final overtime'
148
150 if self.__qtr != other.__qtr:
151 return cmp(self.__qtr, other.__qtr)
152 elif self.__minutes != other.__minutes:
153 return cmp(other.__minutes, self.__minutes)
154 return cmp(other.__seconds, self.__seconds)
155
157 """
158 Returns a nicely formatted string indicating the current time of the
159 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime"
160 and "Final".
161 """
162 try:
163 q = int(self.qtr)
164 return 'Q%d %s' % (q, self.clock)
165 except ValueError:
166 return self.qtr
167
168
169 -class Game (object):
170 """
171 Game represents a single pre- or regular-season game. It provides a window
172 into the statistics of every player that played into the game, along with
173 the winner of the game, the score and a list of all the scoring plays.
174 """
175
176 - def __new__(cls, eid=None, fpath=None):
177
178 try:
179 rawData = _get_json_data(eid, fpath)
180 except urllib2.URLError:
181 return None
182 if rawData is None or rawData.strip() == '{}':
183 return None
184 game = object.__new__(cls)
185 game.rawData = rawData
186
187 try:
188 if eid is not None:
189 game.eid = eid
190 game.data = json.loads(game.rawData)[game.eid]
191 else:
192 game.eid = None
193 game.data = json.loads(game.rawData)
194 for k, v in game.data.iteritems():
195 if isinstance(v, dict):
196 game.eid = k
197 game.data = v
198 break
199 assert game.eid is not None
200 except ValueError:
201 return None
202
203 return game
204
205 - def __init__(self, eid=None, fpath=None):
206 """
207 Creates a new Game instance given a game identifier.
208
209 The game identifier is used by NFL.com's GameCenter live update web
210 pages. It is used to construct a URL to download JSON data for the
211 game.
212
213 If the game has been completed, the JSON data will be cached to disk
214 so that subsequent accesses will not re-download the data but instead
215 read it from disk.
216
217 When the JSON data is written to disk, it is compressed using gzip.
218 """
219
220
221 self.home = self.data['home']['abbr']
222 self.away = self.data['away']['abbr']
223 self.stats_home = _json_team_stats(self.data['home']['stats']['team'])
224 self.stats_away = _json_team_stats(self.data['away']['stats']['team'])
225
226
227 self.time = GameClock(self.data['qtr'], self.data['clock'])
228 self.down = _tryint(self.data['down'])
229 self.togo = _tryint(self.data['togo'])
230 self.score_home = int(self.data['home']['score']['T'])
231 self.score_away = int(self.data['away']['score']['T'])
232 for q in (1, 2, 3, 4, 5):
233 for team in ('home', 'away'):
234 score = self.data[team]['score'][str(q)]
235 self.__dict__['score_%s_q%d' % (team, q)] = int(score)
236
237 if not self.game_over():
238 self.winner = None
239 else:
240 if self.score_home > self.score_away:
241 self.winner = self.home
242 self.loser = self.away
243 elif self.score_away > self.score_home:
244 self.winner = self.away
245 self.loser = self.home
246 else:
247 self.winner = '%s/%s' % (self.home, self.away)
248 self.loser = '%s/%s' % (self.home, self.away)
249
250
251 self.scores = []
252 for k in sorted(map(int, self.data['scrsummary'])):
253 play = self.data['scrsummary'][str(k)]
254 s = '%s - Q%d - %s - %s' \
255 % (play['team'], play['qtr'], play['type'], play['desc'])
256 self.scores.append(s)
257
258
259 if self.game_over() and not os.access(_jsonf % eid, os.R_OK):
260 self.save()
261
263 """Returns true if team (i.e., 'NE') is the home team."""
264 return team == self.home
265
267 """game_over returns true if the game is no longer being played."""
268 return self.time.is_final()
269
271 """playing returns true if the game is currently being played."""
272 return not self.time.is_pregame() and not self.time.is_final()
273
274 - def save(self, fpath=None):
275 """
276 Save the JSON data to fpath. This is done automatically if the
277 game is over.
278 """
279 if fpath is None:
280 fpath = _jsonf % self.eid
281 try:
282 print >> gzip.open(fpath, 'w+'), self.rawData,
283 except IOError:
284 print >> sys.stderr, "Could not cache JSON data. Please " \
285 "make '%s' writable." \
286 % os.path.dirname(fpath)
287
289 """
290 Returns a string of the score of the game.
291 e.g., "NE (32) vs. NYG (0)".
292 """
293 return '%s (%d) vs. %s (%d)' \
294 % (self.home, self.score_home, self.away, self.score_away)
295
297 """
298 Returns a GenPlayers sequence of player statistics that combines
299 game statistics and play statistics by taking the max value of
300 each corresponding statistic.
301
302 This is useful when accuracy is desirable. Namely, using only
303 play-by-play data or using only game statistics can be unreliable.
304 That is, both are inconsistently correct.
305
306 Taking the max values of each statistic reduces the chance of being
307 wrong (particularly for stats that are in both play-by-play data
308 and game statistics), but does not eliminate them.
309 """
310 game_players = list(self.players)
311 play_players = list(self.drives.plays().players())
312 max_players = OrderedDict()
313
314
315
316
317
318
319 for pplay in play_players:
320 newp = nflgame.player.GamePlayerStats(pplay.playerid,
321 pplay.name, pplay.home,
322 pplay.team)
323 maxstats = {}
324 for stat, val in pplay._stats.iteritems():
325 maxstats[stat] = val
326
327 newp._overwrite_stats(maxstats)
328 max_players[pplay.playerid] = newp
329
330 for newp in max_players.itervalues():
331 for pgame in game_players:
332 if pgame.playerid != newp.playerid:
333 continue
334
335 maxstats = {}
336 for stat, val in pgame._stats.iteritems():
337 maxstats[stat] = max([val,
338 newp._stats.get(stat, -_MAX_INT)])
339
340 newp._overwrite_stats(maxstats)
341 break
342 return nflgame.seq.GenPlayerStats(max_players)
343
345 if name == 'players':
346 self.__players = _json_game_player_stats(self, self.data)
347 self.players = nflgame.seq.GenPlayerStats(self.__players)
348 return self.players
349 if name == 'drives':
350 self.__drives = _json_drives(self, self.home, self.data['drives'])
351 self.drives = nflgame.seq.GenDrives(self.__drives)
352 return self.drives
353
355 return diff(other, self)
356
359
360
361 -def diff(before, after):
362 """
363 Returns the difference between two points of time in a game in terms of
364 plays and player statistics. The return value is a GameDiff namedtuple
365 with two attributes: plays and players. Each contains *only* the data
366 that is in the after game but not in the before game.
367
368 This is useful for sending alerts where you're guaranteed to see each
369 play statistic only once (assuming NFL.com behaves itself).
370 """
371 assert after.eid == before.eid
372
373 plays = []
374 after_plays = list(after.drives.plays())
375 before_plays = list(before.drives.plays())
376 for play in after_plays:
377 if play not in before_plays:
378 plays.append(play)
379
380
381
382
383
384
385 _players = OrderedDict()
386 after_players = list(after.max_player_stats())
387 before_players = list(before.max_player_stats())
388 for aplayer in after_players:
389 has_before = False
390 for bplayer in before_players:
391 if aplayer.playerid == bplayer.playerid:
392 has_before = True
393 pdiff = aplayer - bplayer
394 if pdiff is not None:
395 _players[aplayer.playerid] = pdiff
396 if not has_before:
397 _players[aplayer.playerid] = aplayer
398 players = nflgame.seq.GenPlayerStats(_players)
399
400 return GameDiff(before=before, after=after, plays=plays, players=players)
401
402
404 """
405 Drive represents a single drive in an NFL game. It contains a list
406 of all plays that happened in the drive, in chronological order.
407 It also contains meta information about the drive such as the start
408 and stop times and field position, length of possession, the number
409 of first downs and a short descriptive string of the result of the
410 drive.
411
412 """
413 - def __init__(self, game, drive_num, home_team, data):
414 if data is None:
415 return
416 self.game = game
417 self.drive_num = drive_num
418 self.team = data['posteam']
419 self.home = self.team == home_team
420 self.first_downs = int(data['fds'])
421 self.result = data['result']
422 self.penalty_yds = int(data['penyds'])
423 self.total_yds = int(data['ydsgained'])
424 self.pos_time = PossessionTime(data['postime'])
425 self.play_cnt = int(data['numplays'])
426 self.field_start = FieldPosition(self.team, data['start']['yrdln'])
427 self.time_start = GameClock(data['start']['qtr'],
428 data['start']['time'])
429
430
431
432 if data['end']['yrdln'].strip():
433 self.field_end = FieldPosition(self.team, data['end']['yrdln'])
434 else:
435 self.field_end = None
436 playids = sorted(map(int, data['plays'].keys()), reverse=True)
437 for pid in playids:
438 yrdln = data['plays'][str(pid)]['yrdln'].strip()
439 if yrdln:
440 self.field_end = FieldPosition(self.team, yrdln)
441 break
442 if self.field_end is None:
443 self.field_end = FieldPosition(self.team, '50')
444
445
446
447
448 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1])
449 endqtr = data['plays'][lastplayid]['qtr']
450 self.time_end = GameClock(endqtr, data['end']['time'])
451
452 self.__plays = _json_plays(self, data['plays'])
453 self.plays = nflgame.seq.GenPlays(self.__plays)
454
456 """
457 Adds the statistics of two drives together.
458
459 Note that once two drives are added, the following fields
460 automatically get None values: result, field_start, field_end,
461 time_start and time_end.
462 """
463 assert self.team == other.team, \
464 'Cannot add drives from different teams "%s" and "%s".' \
465 % (self.team, other.team)
466 new_drive = Drive(None, 0, '', None)
467 new_drive.team = self.team
468 new_drive.home = self.home
469 new_drive.first_downs = self.first_downs + other.first_downs
470 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds
471 new_drive.total_yds = self.total_yds + other.total_yds
472 new_drive.pos_time = self.pos_time + other.pos_time
473 new_drive.play_cnt = self.play_cnt + other.play_cnt
474 new_drive.__plays = self.__plays + other.__plays
475 new_drive.result = None
476 new_drive.field_start = None
477 new_drive.field_end = None
478 new_drive.time_start = None
479 new_drive.time_end = None
480 return new_drive
481
483 return '%s (Start: %s, End: %s) %s' \
484 % (self.team, self.time_start, self.time_end, self.result)
485
486
487 -class Play (object):
488 """
489 Play represents a single play. It contains a list of all players
490 that participated in the play (including offense, defense and special
491 teams). The play also includes meta information about what down it
492 is, field position, clock time, etc.
493
494 Play objects also contain team-level statistics, such as whether the
495 play was a first down, a fourth down failure, etc.
496 """
497 - def __init__(self, drive, playid, data):
498 self.data = data
499 self.drive = drive
500 self.playid = playid
501 self.team = data['posteam']
502 self.home = self.drive.home
503 self.desc = data['desc']
504 self.note = data['note']
505 self.down = int(data['down'])
506 self.yards_togo = int(data['ydstogo'])
507 self.touchdown = 'touchdown' in self.desc.lower()
508 self._stats = {}
509
510 if not self.team:
511 self.time, self.yardline = None, None
512 else:
513 self.time = GameClock(data['qtr'], data['time'])
514 self.yardline = FieldPosition(self.team, data['yrdln'])
515
516
517
518 if '0' in data['players']:
519 for info in data['players']['0']:
520 if info['statId'] not in nflgame.statmap.idmap:
521 continue
522 statvals = nflgame.statmap.values(info['statId'],
523 info['yards'])
524 for k, v in statvals.iteritems():
525 v = self.__dict__.get(k, 0) + v
526 self.__dict__[k] = v
527 self._stats[k] = v
528
529
530 self.events = _json_play_events(data['players'])
531
532
533
534
535
536 self.__players = _json_play_players(self, data['players'])
537 self.players = nflgame.seq.GenPlayerStats(self.__players)
538 for p in self.players:
539 for k, v in p.stats.iteritems():
540
541
542
543
544 self.__dict__[k] = v
545 self._stats[k] = v
546
548 """Whether a player with id playerid participated in this play."""
549 return playerid in self.__players
550
552 if self.team:
553 if self.down != 0:
554 return '(%s, %s, %d and %d) %s' \
555 % (self.team, self.data['yrdln'],
556 self.down, self.yards_togo, self.desc)
557 else:
558 return '(%s, %s) %s' \
559 % (self.team, self.data['yrdln'], self.desc)
560 return self.desc
561
563 """
564 We use the play description to determine equality because the
565 play description can be changed. (Like when a play is reversed.)
566 """
567 return self.playid == other.playid and self.desc == other.desc
568
571
572
574 """
575 Takes a team stats JSON entry and converts it to a TeamStats namedtuple.
576 """
577 return TeamStats(
578 first_downs=int(data['totfd']),
579 total_yds=int(data['totyds']),
580 passing_yds=int(data['pyds']),
581 rushing_yds=int(data['ryds']),
582 penalty_cnt=int(data['pen']),
583 penalty_yds=int(data['penyds']),
584 turnovers=int(data['trnovr']),
585 punt_cnt=int(data['pt']),
586 punt_yds=int(data['ptyds']),
587 punt_avg=int(data['ptavg']),
588 pos_time=PossessionTime(data['top']))
589
590
592 """
593 Takes a home or away JSON entry and converts it to a list of Drive
594 objects.
595 """
596 drive_nums = []
597 for drive_num in data:
598 try:
599 drive_nums.append(int(drive_num))
600 except:
601 pass
602 drives = []
603 playids = set()
604 for i, drive_num in enumerate(sorted(drive_nums), 1):
605 repeat_drive = False
606 for playid in data[str(drive_num)]['plays']:
607 if playid in playids:
608 repeat_drive = True
609 break
610 playids.add(playid)
611 if repeat_drive:
612 continue
613 drives.append(Drive(game, i, home_team, data[str(drive_num)]))
614 return drives
615
616
618 """
619 Takes a single JSON drive entry (data) and converts it to a list
620 of Play objects.
621 """
622 plays = []
623 for playid in map(str, sorted(map(int, data))):
624 plays.append(Play(drive, playid, data[playid]))
625 return plays
626
627
629 """
630 Takes a single JSON play entry (data) and converts it to an OrderedDict
631 of player statistics.
632
633 play is the instance of Play that this data is part of. It is used
634 to determine whether the player belong to the home team or not.
635 """
636 players = OrderedDict()
637 for playerid, statcats in data.iteritems():
638 if playerid == '0':
639 continue
640 for info in statcats:
641 if info['statId'] not in nflgame.statmap.idmap:
642 continue
643 if playerid not in players:
644 home = play.drive.game.is_home(info['clubcode'])
645 if home:
646 team_name = play.drive.game.home
647 else:
648 team_name = play.drive.game.away
649 stats = nflgame.player.PlayPlayerStats(playerid,
650 info['playerName'],
651 home, team_name)
652 players[playerid] = stats
653 statvals = nflgame.statmap.values(info['statId'], info['yards'])
654 players[playerid]._add_stats(statvals)
655 return players
656
657
659 """
660 Takes a single JSON play entry (data) and converts it to a list of events.
661 """
662 temp = list()
663 for playerid, statcats in data.iteritems():
664 for info in statcats:
665 if info['statId'] not in nflgame.statmap.idmap:
666 continue
667 statvals = nflgame.statmap.values(info['statId'], info['yards'])
668 statvals['playerid'] = None if playerid == '0' else playerid
669 statvals['playername'] = info['playerName'] or None
670 statvals['team'] = info['clubcode']
671 temp.append((int(info['sequence']), statvals))
672 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
673
674
676 """
677 Parses the 'home' and 'away' team stats and returns an OrderedDict
678 mapping player id to their total game statistics as instances of
679 nflgame.player.GamePlayerStats.
680 """
681 players = OrderedDict()
682 for team in ('home', 'away'):
683 for category in nflgame.statmap.categories:
684 if category not in data[team]['stats']:
685 continue
686 for pid, raw in data[team]['stats'][category].iteritems():
687 stats = {}
688 for k, v in raw.iteritems():
689 if k == 'name':
690 continue
691 stats['%s_%s' % (category, k)] = v
692 if pid not in players:
693 home = team == 'home'
694 if home:
695 team_name = game.home
696 else:
697 team_name = game.away
698 players[pid] = nflgame.player.GamePlayerStats(pid,
699 raw['name'],
700 home,
701 team_name)
702 players[pid]._add_stats(stats)
703 return players
704
705
707 """
708 Returns the JSON data corresponding to the game represented by eid.
709
710 If the JSON data is already on disk, it is read, decompressed and returned.
711
712 Otherwise, the JSON data is downloaded from the NFL web site. If the data
713 doesn't exist yet or there was an error, _get_json_data returns None.
714
715 If eid is None, then the JSON data is read from the file at fpath.
716 """
717 assert eid is not None or fpath is not None
718
719 if fpath is not None:
720 return gzip.open(fpath).read()
721
722 fpath = _jsonf % eid
723 if os.access(fpath, os.R_OK):
724 return gzip.open(fpath).read()
725 try:
726 return urllib2.urlopen(_json_base_url % (eid, eid)).read()
727 except urllib2.HTTPError:
728 pass
729 return None
730
731
733 """
734 Tries to convert v to an integer. If it fails, return 0.
735 """
736 try:
737 return int(v)
738 except:
739 return 0
740