1 from collections import namedtuple
2 import os
3 import os.path as path
4 import gzip
5 import json
6 import sys
7 import urllib2
8
9 from nflgame import OrderedDict
10 import nflgame.player
11 import nflgame.seq
12 import nflgame.statmap
13
14 _MAX_INT = sys.maxint
15
16 _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz')
17 _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json"
18
19 GameDiff = namedtuple('GameDiff', ['before', 'after', 'plays', 'players'])
20 """
21 Represents the difference between two points in time of the same game
22 in terms of plays and player statistics.
23 """
24
25 TeamStats = namedtuple('TeamStats',
26 ['first_downs', 'total_yds', 'passing_yds',
27 'rushing_yds', 'penalty_cnt', 'penalty_yds',
28 'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg',
29 'pos_time'])
30 """A collection of team statistics for an entire game."""
31
32
34 """
35 Represents field position.
36
37 The representation here is an integer offset where the 50 yard line
38 corresponds to '0'. Being in the own territory corresponds to a negative
39 offset while being in the opponent's territory corresponds to a positive
40 offset.
41
42 e.g., NE has the ball on the NE 45, the offset is -5.
43 e.g., NE has the ball on the NYG 2, the offset is 48.
44 """
45 - def __new__(cls, pos_team, yardline):
46 if not yardline:
47 return None
48 return object.__new__(cls)
49
51 """
52 pos_team is the team on offense, and yardline is a string formatted
53 like 'team-territory yard-line'. e.g., "NE 32".
54 """
55 if yardline == '50':
56 self.offset = 0
57 return
58
59 territory, yd_str = yardline.split()
60 yd = int(yd_str)
61 if territory == pos_team:
62 self.offset = -(50 - yd)
63 else:
64 self.offset = 50 - yd
65
67 return cmp(self.offset, other.offset)
68
70 return '%d' % self.offset
71
72
74 """
75 Represents the amount of time a drive lasted in (minutes, seconds).
76 """
78 self.clock = clock
79
80 try:
81 self.minutes, self.seconds = map(int, self.clock.split(':'))
82 except ValueError:
83 self.minutes, self.seconds = 0, 0
84
86 """
87 Returns the total number of seconds that this possession lasted for.
88 """
89 return self.seconds + self.minutes * 60
90
92 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds)
93 return cmp(a, b)
94
102
111
114
115
117 """
118 Represents the current time in a game. Namely, it keeps track of the
119 quarter and clock time. Also, GameClock can represent whether
120 the game hasn't started yet, is half time or if it's over.
121 """
123 self.qtr = qtr
124 self.clock = clock
125
126 try:
127 self.__minutes, self.__seconds = map(int, self.clock.split(':'))
128 except ValueError:
129 self.__minutes, self.__seconds = 0, 0
130 except AttributeError:
131 self.__minutes, self.__seconds = 0, 0
132 try:
133 self.__qtr = int(self.qtr)
134 if self.__qtr >= 3:
135 self.__qtr += 1
136 except ValueError:
137 if self.is_pregame():
138 self.__qtr = 0
139 elif self.is_halftime():
140 self.__qtr = 3
141 elif self.is_final():
142 self.__qtr = sys.maxint
143 else:
144 assert False, 'Unknown QTR value: "%s"' % self.qtr
145
147 return self.qtr == 'Pregame'
148
150 return self.qtr == 'Halftime'
151
153 return self.qtr == 'Final' or self.qtr == 'final overtime'
154
156 if self.__qtr != other.__qtr:
157 return cmp(self.__qtr, other.__qtr)
158 elif self.__minutes != other.__minutes:
159 return cmp(other.__minutes, self.__minutes)
160 return cmp(other.__seconds, self.__seconds)
161
163 """
164 Returns a nicely formatted string indicating the current time of the
165 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime"
166 and "Final".
167 """
168 try:
169 q = int(self.qtr)
170 return 'Q%d %s' % (q, self.clock)
171 except ValueError:
172 return self.qtr
173
174
175 -class Game (object):
176 """
177 Game represents a single pre- or regular-season game. It provides a window
178 into the statistics of every player that played into the game, along with
179 the winner of the game, the score and a list of all the scoring plays.
180 """
181
182 - def __new__(cls, eid=None, fpath=None):
183
184 try:
185 rawData = _get_json_data(eid, fpath)
186 except urllib2.URLError:
187 return None
188 if rawData is None or rawData.strip() == '{}':
189 return None
190 game = object.__new__(cls)
191 game.rawData = rawData
192
193 try:
194 if eid is not None:
195 game.eid = eid
196 game.data = json.loads(game.rawData)[game.eid]
197 else:
198 game.eid = None
199 game.data = json.loads(game.rawData)
200 for k, v in game.data.iteritems():
201 if isinstance(v, dict):
202 game.eid = k
203 game.data = v
204 break
205 assert game.eid is not None
206 except ValueError:
207 return None
208
209 return game
210
211 - def __init__(self, eid=None, fpath=None):
212 """
213 Creates a new Game instance given a game identifier.
214
215 The game identifier is used by NFL.com's GameCenter live update web
216 pages. It is used to construct a URL to download JSON data for the
217 game.
218
219 If the game has been completed, the JSON data will be cached to disk
220 so that subsequent accesses will not re-download the data but instead
221 read it from disk.
222
223 When the JSON data is written to disk, it is compressed using gzip.
224 """
225
226
227 self.home = self.data['home']['abbr']
228 self.away = self.data['away']['abbr']
229 self.stats_home = _json_team_stats(self.data['home']['stats']['team'])
230 self.stats_away = _json_team_stats(self.data['away']['stats']['team'])
231
232
233 self.time = GameClock(self.data['qtr'], self.data['clock'])
234 self.down = _tryint(self.data['down'])
235 self.togo = _tryint(self.data['togo'])
236 self.score_home = int(self.data['home']['score']['T'])
237 self.score_away = int(self.data['away']['score']['T'])
238 for q in (1, 2, 3, 4, 5):
239 for team in ('home', 'away'):
240 score = self.data[team]['score'][str(q)]
241 self.__dict__['score_%s_q%d' % (team, q)] = int(score)
242
243 if not self.game_over():
244 self.winner = None
245 else:
246 if self.score_home > self.score_away:
247 self.winner = self.home
248 self.loser = self.away
249 elif self.score_away > self.score_home:
250 self.winner = self.away
251 self.loser = self.home
252 else:
253 self.winner = '%s/%s' % (self.home, self.away)
254 self.loser = '%s/%s' % (self.home, self.away)
255
256
257 self.scores = []
258 for k in sorted(map(int, self.data['scrsummary'])):
259 play = self.data['scrsummary'][str(k)]
260 s = '%s - Q%d - %s - %s' \
261 % (play['team'], play['qtr'], play['type'], play['desc'])
262 self.scores.append(s)
263
264
265 if self.game_over() and not os.access(_jsonf % eid, os.R_OK):
266 self.save()
267
269 """Returns true if team (i.e., 'NE') is the home team."""
270 return team == self.home
271
273 """game_over returns true if the game is no longer being played."""
274 return self.time.is_final()
275
277 """playing returns true if the game is currently being played."""
278 return not self.time.is_pregame() and not self.time.is_final()
279
280 - def save(self, fpath=None):
281 """
282 Save the JSON data to fpath. This is done automatically if the
283 game is over.
284 """
285 if fpath is None:
286 fpath = _jsonf % self.eid
287 try:
288 print >> gzip.open(fpath, 'w+'), self.rawData,
289 except IOError:
290 print >> sys.stderr, "Could not cache JSON data. Please " \
291 "make '%s' writable." \
292 % os.path.dirname(fpath)
293
295 """
296 Returns a string of the score of the game.
297 e.g., "NE (32) vs. NYG (0)".
298 """
299 return '%s (%d) vs. %s (%d)' \
300 % (self.home, self.score_home, self.away, self.score_away)
301
303 """
304 Returns a GenPlayers sequence of player statistics that combines
305 game statistics and play statistics by taking the max value of
306 each corresponding statistic.
307
308 This is useful when accuracy is desirable. Namely, using only
309 play-by-play data or using only game statistics can be unreliable.
310 That is, both are inconsistently correct.
311
312 Taking the max values of each statistic reduces the chance of being
313 wrong (particularly for stats that are in both play-by-play data
314 and game statistics), but does not eliminate them.
315 """
316 game_players = list(self.players)
317 play_players = list(self.drives.plays().players())
318 max_players = OrderedDict()
319
320
321
322
323
324
325 for pplay in play_players:
326 newp = nflgame.player.GamePlayerStats(pplay.playerid,
327 pplay.name, pplay.home,
328 pplay.team)
329 maxstats = {}
330 for stat, val in pplay._stats.iteritems():
331 maxstats[stat] = val
332
333 newp._overwrite_stats(maxstats)
334 max_players[pplay.playerid] = newp
335
336 for newp in max_players.itervalues():
337 for pgame in game_players:
338 if pgame.playerid != newp.playerid:
339 continue
340
341 maxstats = {}
342 for stat, val in pgame._stats.iteritems():
343 maxstats[stat] = max([val,
344 newp._stats.get(stat, -_MAX_INT)])
345
346 newp._overwrite_stats(maxstats)
347 break
348 return nflgame.seq.GenPlayerStats(max_players)
349
351 if name == 'players':
352 self.__players = _json_game_player_stats(self, self.data)
353 self.players = nflgame.seq.GenPlayerStats(self.__players)
354 return self.players
355 if name == 'drives':
356 self.__drives = _json_drives(self, self.home, self.data['drives'])
357 self.drives = nflgame.seq.GenDrives(self.__drives)
358 return self.drives
359
361 return diff(other, self)
362
365
366
367 -def diff(before, after):
368 """
369 Returns the difference between two points of time in a game in terms of
370 plays and player statistics. The return value is a GameDiff namedtuple
371 with two attributes: plays and players. Each contains *only* the data
372 that is in the after game but not in the before game.
373
374 This is useful for sending alerts where you're guaranteed to see each
375 play statistic only once (assuming NFL.com behaves itself).
376 """
377 assert after.eid == before.eid
378
379 plays = []
380 after_plays = list(after.drives.plays())
381 before_plays = list(before.drives.plays())
382 for play in after_plays:
383 if play not in before_plays:
384 plays.append(play)
385
386
387
388
389
390
391 _players = OrderedDict()
392 after_players = list(after.max_player_stats())
393 before_players = list(before.max_player_stats())
394 for aplayer in after_players:
395 has_before = False
396 for bplayer in before_players:
397 if aplayer.playerid == bplayer.playerid:
398 has_before = True
399 pdiff = aplayer - bplayer
400 if pdiff is not None:
401 _players[aplayer.playerid] = pdiff
402 if not has_before:
403 _players[aplayer.playerid] = aplayer
404 players = nflgame.seq.GenPlayerStats(_players)
405
406 return GameDiff(before=before, after=after, plays=plays, players=players)
407
408
410 """
411 Drive represents a single drive in an NFL game. It contains a list
412 of all plays that happened in the drive, in chronological order.
413 It also contains meta information about the drive such as the start
414 and stop times and field position, length of possession, the number
415 of first downs and a short descriptive string of the result of the
416 drive.
417
418 """
419 - def __init__(self, game, drive_num, home_team, data):
420 if data is None:
421 return
422 self.game = game
423 self.drive_num = drive_num
424 self.team = data['posteam']
425 self.home = self.team == home_team
426 self.first_downs = int(data['fds'])
427 self.result = data['result']
428 self.penalty_yds = int(data['penyds'])
429 self.total_yds = int(data['ydsgained'])
430 self.pos_time = PossessionTime(data['postime'])
431 self.play_cnt = int(data['numplays'])
432 self.field_start = FieldPosition(self.team, data['start']['yrdln'])
433 self.time_start = GameClock(data['start']['qtr'],
434 data['start']['time'])
435
436
437
438 if data['end']['yrdln'].strip():
439 self.field_end = FieldPosition(self.team, data['end']['yrdln'])
440 else:
441 self.field_end = None
442 playids = sorted(map(int, data['plays'].keys()), reverse=True)
443 for pid in playids:
444 yrdln = data['plays'][str(pid)]['yrdln'].strip()
445 if yrdln:
446 self.field_end = FieldPosition(self.team, yrdln)
447 break
448 if self.field_end is None:
449 self.field_end = FieldPosition(self.team, '50')
450
451
452
453
454 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1])
455 endqtr = data['plays'][lastplayid]['qtr']
456 self.time_end = GameClock(endqtr, data['end']['time'])
457
458 self.__plays = _json_plays(self, data['plays'])
459 self.plays = nflgame.seq.GenPlays(self.__plays)
460
462 """
463 Adds the statistics of two drives together.
464
465 Note that once two drives are added, the following fields
466 automatically get None values: result, field_start, field_end,
467 time_start and time_end.
468 """
469 assert self.team == other.team, \
470 'Cannot add drives from different teams "%s" and "%s".' \
471 % (self.team, other.team)
472 new_drive = Drive(None, 0, '', None)
473 new_drive.team = self.team
474 new_drive.home = self.home
475 new_drive.first_downs = self.first_downs + other.first_downs
476 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds
477 new_drive.total_yds = self.total_yds + other.total_yds
478 new_drive.pos_time = self.pos_time + other.pos_time
479 new_drive.play_cnt = self.play_cnt + other.play_cnt
480 new_drive.__plays = self.__plays + other.__plays
481 new_drive.result = None
482 new_drive.field_start = None
483 new_drive.field_end = None
484 new_drive.time_start = None
485 new_drive.time_end = None
486 return new_drive
487
489 return '%s (Start: %s, End: %s) %s' \
490 % (self.team, self.time_start, self.time_end, self.result)
491
492
493 -class Play (object):
494 """
495 Play represents a single play. It contains a list of all players
496 that participated in the play (including offense, defense and special
497 teams). The play also includes meta information about what down it
498 is, field position, clock time, etc.
499
500 Play objects also contain team-level statistics, such as whether the
501 play was a first down, a fourth down failure, etc.
502 """
503 - def __init__(self, drive, playid, data):
504 self.data = data
505 self.drive = drive
506 self.playid = playid
507 self.team = data['posteam']
508 self.home = self.drive.home
509 self.desc = data['desc']
510 self.note = data['note']
511 self.down = int(data['down'])
512 self.yards_togo = int(data['ydstogo'])
513 self.touchdown = 'touchdown' in self.desc.lower()
514 self._stats = {}
515
516 if not self.team:
517 self.time, self.yardline = None, None
518 else:
519 self.time = GameClock(data['qtr'], data['time'])
520 self.yardline = FieldPosition(self.team, data['yrdln'])
521
522
523
524 if '0' in data['players']:
525 for info in data['players']['0']:
526 if info['statId'] not in nflgame.statmap.idmap:
527 continue
528 statvals = nflgame.statmap.values(info['statId'],
529 info['yards'])
530 for k, v in statvals.iteritems():
531 v = self.__dict__.get(k, 0) + v
532 self.__dict__[k] = v
533 self._stats[k] = v
534
535
536 self.events = _json_play_events(data['players'])
537
538
539
540
541
542 self.__players = _json_play_players(self, data['players'])
543 self.players = nflgame.seq.GenPlayerStats(self.__players)
544 for p in self.players:
545 for k, v in p.stats.iteritems():
546
547
548
549
550 self.__dict__[k] = v
551 self._stats[k] = v
552
554 """Whether a player with id playerid participated in this play."""
555 return playerid in self.__players
556
558 if self.team:
559 if self.down != 0:
560 return '(%s, %s, %d and %d) %s' \
561 % (self.team, self.data['yrdln'],
562 self.down, self.yards_togo, self.desc)
563 else:
564 return '(%s, %s) %s' \
565 % (self.team, self.data['yrdln'], self.desc)
566 return self.desc
567
569 """
570 We use the play description to determine equality because the
571 play description can be changed. (Like when a play is reversed.)
572 """
573 return self.playid == other.playid and self.desc == other.desc
574
577
578
580 """
581 Takes a team stats JSON entry and converts it to a TeamStats namedtuple.
582 """
583 return TeamStats(
584 first_downs=int(data['totfd']),
585 total_yds=int(data['totyds']),
586 passing_yds=int(data['pyds']),
587 rushing_yds=int(data['ryds']),
588 penalty_cnt=int(data['pen']),
589 penalty_yds=int(data['penyds']),
590 turnovers=int(data['trnovr']),
591 punt_cnt=int(data['pt']),
592 punt_yds=int(data['ptyds']),
593 punt_avg=int(data['ptavg']),
594 pos_time=PossessionTime(data['top']))
595
596
598 """
599 Takes a home or away JSON entry and converts it to a list of Drive
600 objects.
601 """
602 drive_nums = []
603 for drive_num in data:
604 try:
605 drive_nums.append(int(drive_num))
606 except:
607 pass
608 drives = []
609 playids = set()
610 for i, drive_num in enumerate(sorted(drive_nums), 1):
611 repeat_drive = False
612 for playid in data[str(drive_num)]['plays']:
613 if playid in playids:
614 repeat_drive = True
615 break
616 playids.add(playid)
617 if repeat_drive:
618 continue
619 drives.append(Drive(game, i, home_team, data[str(drive_num)]))
620 return drives
621
622
624 """
625 Takes a single JSON drive entry (data) and converts it to a list
626 of Play objects.
627 """
628 plays = []
629 for playid in map(str, sorted(map(int, data))):
630 plays.append(Play(drive, playid, data[playid]))
631 return plays
632
633
635 """
636 Takes a single JSON play entry (data) and converts it to an OrderedDict
637 of player statistics.
638
639 play is the instance of Play that this data is part of. It is used
640 to determine whether the player belong to the home team or not.
641 """
642 players = OrderedDict()
643 for playerid, statcats in data.iteritems():
644 if playerid == '0':
645 continue
646 for info in statcats:
647 if info['statId'] not in nflgame.statmap.idmap:
648 continue
649 if playerid not in players:
650 home = play.drive.game.is_home(info['clubcode'])
651 if home:
652 team_name = play.drive.game.home
653 else:
654 team_name = play.drive.game.away
655 stats = nflgame.player.PlayPlayerStats(playerid,
656 info['playerName'],
657 home, team_name)
658 players[playerid] = stats
659 statvals = nflgame.statmap.values(info['statId'], info['yards'])
660 players[playerid]._add_stats(statvals)
661 return players
662
663
665 """
666 Takes a single JSON play entry (data) and converts it to a list of events.
667 """
668 temp = list()
669 for playerid, statcats in data.iteritems():
670 for info in statcats:
671 if info['statId'] not in nflgame.statmap.idmap:
672 continue
673 statvals = nflgame.statmap.values(info['statId'], info['yards'])
674 statvals['playerid'] = None if playerid == '0' else playerid
675 statvals['playername'] = info['playerName'] or None
676 statvals['team'] = info['clubcode']
677 temp.append((int(info['sequence']), statvals))
678 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
679
680
682 """
683 Parses the 'home' and 'away' team stats and returns an OrderedDict
684 mapping player id to their total game statistics as instances of
685 nflgame.player.GamePlayerStats.
686 """
687 players = OrderedDict()
688 for team in ('home', 'away'):
689 for category in nflgame.statmap.categories:
690 if category not in data[team]['stats']:
691 continue
692 for pid, raw in data[team]['stats'][category].iteritems():
693 stats = {}
694 for k, v in raw.iteritems():
695 if k == 'name':
696 continue
697 stats['%s_%s' % (category, k)] = v
698 if pid not in players:
699 home = team == 'home'
700 if home:
701 team_name = game.home
702 else:
703 team_name = game.away
704 players[pid] = nflgame.player.GamePlayerStats(pid,
705 raw['name'],
706 home,
707 team_name)
708 players[pid]._add_stats(stats)
709 return players
710
711
713 """
714 Returns the JSON data corresponding to the game represented by eid.
715
716 If the JSON data is already on disk, it is read, decompressed and returned.
717
718 Otherwise, the JSON data is downloaded from the NFL web site. If the data
719 doesn't exist yet or there was an error, _get_json_data returns None.
720
721 If eid is None, then the JSON data is read from the file at fpath.
722 """
723 assert eid is not None or fpath is not None
724
725 if fpath is not None:
726 return gzip.open(fpath).read()
727
728 fpath = _jsonf % eid
729 if os.access(fpath, os.R_OK):
730 return gzip.open(fpath).read()
731 try:
732 return urllib2.urlopen(_json_base_url % (eid, eid), timeout=5).read()
733 except urllib2.HTTPError:
734 pass
735 return None
736
737
739 """
740 Tries to convert v to an integer. If it fails, return 0.
741 """
742 try:
743 return int(v)
744 except:
745 return 0
746