1 """
2 Introduction
3 ============
4 A simple library to download, slice and search NFL game footage on a
5 play-by-play basis.
6
7 This library comes with preloaded play-by-play meta data, which describes the
8 start time of each play in the game footage. However, the actual footage does
9 not come with this library and is not released by me. This package therefore
10 provides utilities to batch download NFL Game Footage from the original source.
11
12 Once game footage is downloaded, you can use this library to search plays and
13 construct a playlist to play in any video player.
14 """
15
16 import gzip
17 import os
18 import os.path as path
19 import socket
20 import sys
21 import urllib2
22
23 import bs4
24
25 import eventlet
26 httplib2 = eventlet.import_patched('httplib2')
27 import eventlet.green.subprocess as subprocess
28
29 from nflgame import OrderedDict
30
31 _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz')
32 _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml'
33
34 _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \
35 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8'
36
37 __play_cache = {}
38
39
45
46
58
59
73
74
76 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
77
78
80 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
81
82
84 return '(Season: %s, Week: %s, %s)' \
85 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
86
87
89 """
90 Scans the game directory inside footage_play_dir and returns a list
91 of plays that haven't been sliced yet. In particular, a play is only
92 considered sliced if the following file is readable, assuming {playid}
93 is its play id::
94
95 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
96
97 All plays for the game given that don't fit this criteria will be
98 returned in the list.
99
100 If the list is empty, then all plays for the game have been sliced.
101 Alternatively, None can be returned if there was a problem retrieving
102 the play-by-play meta data.
103
104 If dry_run is true, then only the first 10 plays of the game are
105 sliced.
106 """
107 ps = plays(gobj)
108 outdir = _play_path(footage_play_dir, gobj)
109
110 unsliced = []
111 if ps is None:
112 return None
113 for i, p in enumerate(ps.values()):
114 if dry_run and i >= 10:
115 break
116 pid = p.idstr()
117 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK):
118 unsliced.append(p)
119 return unsliced
120
121
122 -def slice(footage_play_dir, full_footage_file, gobj, threads=4, dry_run=False):
123 """
124 Uses ffmpeg to slice the given footage file into play-by-play pieces.
125 The full_footage_file should point to a full game downloaded with
126 nflvid-footage and gobj should be the corresponding nflgame.game.Game
127 object.
128
129 The footage_play_dir is where the pieces will be saved::
130
131 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
132
133 This function will not duplicate work. If a video file exists for
134 a particular play, then slice will not regenerate it.
135
136 Note that this function uses an eventlet green pool to run multiple
137 ffmpeg instances simultaneously. The maximum number of threads to
138 use is specified by threads. This function only terminates when all
139 threads have finished processing.
140
141 If dry_run is true, then only the first 10 plays of the game are
142 sliced.
143 """
144 outdir = _play_path(footage_play_dir, gobj)
145 if not os.access(outdir, os.R_OK):
146 os.makedirs(outdir)
147
148 pool = eventlet.greenpool.GreenPool(threads)
149 for p in unsliced_plays(footage_play_dir, gobj, dry_run) or []:
150 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p)
151 pool.waitall()
152
153
154 -def slice_play(footage_play_dir, full_footage_file, gobj, play,
155 max_duration=15):
156 """
157 This is just like slice, but it only slices the play provided.
158 In typical cases, slice should be used since it makes sure not
159 to duplicate work.
160
161 This function will not check if the play-by-play directory for
162 gobj has been created.
163
164 max_duration is used to cap the length of a play. This drastically
165 cuts down on the time required to slice a game and the storage
166 requirements of a game at the cost of potentially missing bigger
167 plays. This may get smarter in the future. Set max_duration to 0
168 to impose no artificial cap.
169 """
170 outdir = _play_path(footage_play_dir, gobj)
171 st = play.start
172 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli)
173 outpath = path.join(outdir, '%s.mp4' % play.idstr())
174
175 duration = max_duration
176 if duration == 0 or play.duration < max_duration:
177 duration = play.duration or 40
178
179 cmd = ['ffmpeg',
180 '-ss', start_time,
181 '-i', full_footage_file,
182 '-t', '%d' % duration,
183 '-map', '0',
184 '-strict', '-2',
185 outpath,
186 ]
187 _run_command(cmd)
188
189
190 -def download(footage_dir, gobj, quality='1600', dry_run=False):
191 """
192 Starts an ffmpeg process to download the full footage of the given
193 game with the quality provided. The qualities available are:
194 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
195
196 The footage will be saved to the following path::
197
198 footage_dir/{eid}-{gamekey}.mp4
199
200 If footage is already at that path, then a LookupError is raised.
201
202 A full game's worth of footage at a quality of 1600 is about 2GB.
203 """
204 fp = _full_path(footage_dir, gobj)
205 if os.access(fp, os.R_OK):
206 raise LookupError('Footage path "%s" already exists.' % fp)
207
208 url = footage_url(gobj, quality)
209
210
211
212
213 resp, _ = httplib2.Http().request(url, 'HEAD')
214 if resp['status'] != '200':
215 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \
216 % (resp['status'], _nice_game(gobj), url)
217 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
218 return
219
220 cmd = ['ffmpeg',
221 '-timeout', '60',
222 '-i', url]
223 if dry_run:
224 cmd += ['-t', '30']
225 cmd += ['-strict', '-2', fp]
226
227 print >> sys.stderr, 'Downloading game %s %s' \
228 % (gobj.eid, _nice_game(gobj))
229 if not _run_command(cmd):
230 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
231 else:
232 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
233
234
236 try:
237 p = subprocess.Popen(cmd,
238 stdout=subprocess.PIPE,
239 stderr=subprocess.STDOUT)
240 output = p.communicate()[0].strip()
241
242 if p.returncode > 0:
243 err = subprocess.CalledProcessError(p.returncode, cmd)
244 err.output = output
245 raise err
246 except subprocess.CalledProcessError, e:
247 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n')))
248 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \
249 % (' '.join(cmd), e.returncode, indent(e.output))
250 return False
251 except OSError, e:
252 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \
253 % (' '.join(cmd), e.errno, e.strerror)
254 return False
255 return True
256
257
259 """
260 Returns an ordered dictionary of all plays for a particular game.
261
262 The game must be a nflgame.game.Game object.
263
264 If there is a problem retrieving the data, None is returned.
265
266 If the game is over, then the XML data is saved to disk.
267 """
268 if gobj.game_over() and gobj.eid in __play_cache:
269 return __play_cache[gobj.eid]
270
271 rawxml = _get_xml_data((gobj.eid, gobj.gamekey))
272 ps = _xml_play_data(rawxml)
273 if ps is None:
274 return None
275 if len(ps) == 0:
276 print >> sys.stderr, 'Could not find ArchiveTCIN field in XML data. ' \
277 'This field provides the start time of each play.'
278 return None
279 __play_cache[gobj.eid] = ps
280
281
282 if gobj.game_over():
283 fp = _xmlf % (gobj.eid, gobj.gamekey)
284 try:
285 print >> gzip.open(fp, 'w+'), rawxml,
286 except IOError:
287 print >> sys.stderr, 'Could not cache XML data. Please make ' \
288 '"%s" writable.' % path.dirname(fp)
289 return ps
290
291
292 -def play(gobj, playid):
293 """
294 Returns a Play object given a game and a play id. The game must be
295 a nflgame.game.Game object.
296
297 If a play with the given id does not exist, None is returned.
298 """
299 return plays(gobj).get(playid, None)
300
301
302 -class Play (object):
303 """
304 Represents a single play with meta data that ties it to game footage.
305 The footage_start corresponds to the 'ArchiveTCIN', which is when
306 the play starts. Since there is no record of when a play stops, the
307 duration is computed by subtracting the start time from the start
308 time of the next play. If it's the last play recorded, then the
309 duration is None.
310
311 The play id is the foreign key that maps to play data stored in nflgame.
312 """
313 - def __init__(self, start, duration, playid):
314 self.start, self.duration, self.playid = start, duration, playid
315
317 """Returns a string play id padded with zeroes."""
318 return '%04d' % int(self.playid)
319
321 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
322
323
325 """
326 Represents a footage time point, in the format HH:MM:SS:MM
327 """
329 self.point = point
330
331 try:
332 parts = map(int, self.point.split(':'))
333 except ValueError:
334 assert False, 'Bad play time format: %s' % self.point
335
336 if len(parts) != 4:
337 assert False, 'Expected 4 parts but got %d in: %s' \
338 % (len(parts), self.point)
339
340 self.hh, self.mm, self.ss, self.milli = parts
341
342
343 self.milli *= 10
344
346 """
347 Returns this time point rounded to the nearest second.
348 """
349 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
350 if self.milli >= 50:
351 secs += 1
352 return secs
353
355 """
356 Returns this time point as fractional seconds based on milliseconds.
357 """
358 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
359 secs = (1000 * secs) + self.milli
360 return float(secs) / 1000.0
361
364
366 """
367 Returns the difference rounded to nearest second between
368 two time points. The 'other' time point must take place before the
369 current time point.
370 """
371 assert other <= self, '%s is not <= than %s' % (other, self)
372 return int(round(self.fractional() - other.fractional()))
373
376
377
379 """
380 Parses the XML raw data given into an ordered dictionary of Play
381 objects. The dictionary is keyed by play id.
382 """
383 if data is None:
384 return None
385
386
387
388 rows = []
389 for row in bs4.BeautifulSoup(data).find_all('row'):
390 playid = row.find('id')
391 if not playid:
392 continue
393 playid = playid.get_text().strip()
394
395 start = row.find('archivetcin')
396 if not start:
397 continue
398 start = PlayTime(start.get_text().strip())
399
400
401 if len(rows) > 0 and start < rows[-1][1]:
402 continue
403 rows.append((playid, start, row))
404
405
406
407
408 def ignore(row):
409 if 'playdescription' in row.attrs:
410 if row['playdescription'].lower().startswith('timeout'):
411 return True
412 if row['playdescription'].lower().startswith('two-minute'):
413 return True
414
415
416 if 'preplaybyplay' in row.attrs:
417 if row['preplaybyplay'].lower().startswith('timeout'):
418 return True
419 return False
420
421 d = OrderedDict()
422 for i, (playid, start, row) in enumerate(rows):
423 if ignore(row):
424 continue
425 duration = None
426 if i < len(rows) - 1:
427 duration = rows[i+1][1] - start
428 d[playid] = Play(start, duration, playid)
429 return d
430
431
433 """
434 Returns the XML play data corresponding to the game given. A game must
435 be specified as a tuple: the first element should be an eid and the second
436 element should be a game key. For example, ('2012102108', '55604').
437
438 If the XML data is already on disk, it is read, decompressed and returned.
439
440 Otherwise, the XML data is downloaded from the NFL web site. If the data
441 doesn't exist yet or there was an error, _get_xml_data returns None.
442
443 If game is None, then the XML data is read from the file at fpath.
444 """
445 assert game is not None or fpath is not None
446
447 if fpath is not None:
448 return gzip.open(fpath).read()
449
450 fpath = _xmlf % (game[0], game[1])
451 if os.access(fpath, os.R_OK):
452 return gzip.open(fpath).read()
453 try:
454 year = int(game[0][0:4])
455 month = int(game[0][4:6])
456 if month <= 3:
457 year -= 1
458 u = _xml_base_url % (year, game[1])
459 return urllib2.urlopen(u, timeout=10).read()
460 except urllib2.HTTPError, e:
461 print >> sys.stderr, e
462 except socket.timeout, e:
463 print >> sys.stderr, e
464 return None
465