simple_ffmpeg_batch_io.AudioIO
Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend.
This module defines the main AudioIO class used to open audio streams,
read audio frames or batches of frames, and write processed outputs.
Authors
Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV)
1""" 2Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend. 3 4This module defines the main `AudioIO` class used to open audio streams, 5read audio frames or batches of frames, and write processed outputs. 6 7Authors 8------- 9Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV) 10 11""" 12 13__authors__ = ("Dominique Vaufreydaz") 14 15import sys 16import subprocess as sp 17import re 18from enum import Enum 19 20import numpy as np 21 22from .FrameCounter import FrameCounter 23from .FrameContainer import FrameContainer 24from .PipeMode import PipeMode 25 26# init static_ffmpeg at import time, first time it will download ffmpeg executables 27import static_ffmpeg 28static_ffmpeg.add_paths() 29 30class AudioIO: 31 # "static" variables to ffmpeg, ffprobe executables 32 audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise() 33 34 class AudioIOException(Exception): 35 """ 36 Dedicated exception class for AudioIO class. 37 """ 38 def __init__(self, message="Error while reading/writing video occurs"): 39 self.message = message 40 super().__init__(self.message) 41 42 class AudioFormat(Enum): 43 """ 44 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 45 """ 46 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment) 47 48 @classmethod 49 def reader(cls, filename, **kwargs): 50 """ 51 Create and open an AudioIO object in reader mode 52 53 See ``AudioIO.open`` for the full list of accepted parameters. 54 """ 55 reader = cls() 56 reader.open(filename, **kwargs) 57 return reader 58 59 @classmethod 60 def writer(cls, filename, **kwargs): 61 """ 62 Create and open an AudioIO object in writer mode 63 64 See ``AudioIO.create`` for the full list of accepted parameters. 65 """ 66 writer = cls() 67 writer.create(filename, **kwargs) 68 return writer 69 70 # To use with context manager "with AudioIO.reader(...) as f:' for instance 71 def __enter__(self): 72 """ 73 Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 74 """ 75 # simply return myself 76 return self 77 78 def __exit__(self, exc_type, exc_val, exc_tb): 79 """ 80 Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 81 """ 82 # close AudioIO 83 self.close() 84 return False 85 86 @staticmethod 87 def get_time_in_sec(filename, *, debug=False, logLevel=16): 88 """ 89 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 90 91 Parameters 92 ---------- 93 filename : str or path. 94 Raw audio waveform as a 1D array. 95 96 debug : bool (default False). 97 Show debug info. 98 99 log_level: int (default 16). 100 Log level to pass to the underlying ffmpeg/ffprobe command. 101 102 Returns 103 ---------- 104 float 105 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 106 """ 107 108 cmd = [AudioIO.paramProgram, # ffprobe 109 '-hide_banner', 110 '-loglevel', str(logLevel), 111 '-show_entries', 'format=duration', 112 '-of', 'default=noprint_wrappers=1:nokey=1', 113 filename 114 ] 115 116 if debug == True: 117 print(' '.join(cmd)) 118 119 # call ffprobe and get params in one single line 120 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 121 output = lpipe.stdout.readlines() 122 lpipe.terminate() 123 # transform Bytes output to one single string 124 output = ''.join( [element.decode('utf-8') for element in output]) 125 126 try: 127 return float(output) 128 except (ValueError, TypeError): 129 return None 130 131 @staticmethod 132 def get_params(filename, *, debug=False, logLevel=16): 133 """ 134 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 135 136 Parameters 137 ---------- 138 filename : str or path. 139 Raw audio waveform as a 1D array. 140 141 debug : bool (default (False). 142 Show debug info. 143 144 log_level: int (default 16). 145 Log level to pass to the underlying ffmpeg/ffprobe command. 146 147 Returns 148 ---------- 149 tuple 150 Tuple containing (channels,sample_rate) of the file 151 """ 152 cmd = [AudioIO.paramProgram, # ffprobe 153 '-hide_banner', 154 '-loglevel', str(logLevel), 155 '-show_entries', 'stream=channels,sample_rate', 156 filename 157 ] 158 159 if debug == True: 160 print(' '.join(cmd)) 161 162 # call ffprobe and get params in one single line 163 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 164 output = lpipe.stdout.readlines() 165 lpipe.terminate() 166 # transform Bytes output to one single string 167 output = ''.join( [element.decode('utf-8') for element in output]) 168 169 pattern_sample_rate = r'sample_rate=(\d+)' 170 pattern_channels = r'channels=(\d+)' 171 172 # Search for values in the ffprobe output 173 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 174 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 175 176 # Extraction des valeurs 177 if match_sample_rate: 178 sample_rate = int(match_sample_rate.group(1)) 179 else: 180 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 181 182 if match_channels: 183 channels = int(match_channels.group(1)) 184 else: 185 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 186 187 return (channels,sample_rate) 188 189 # Attributes 190 mode: PipeMode 191 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 192 193 loglevel: int 194 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 195 196 debugModel: bool 197 """ debutMode flag for this object (print debut info, default False)""" 198 199 channels: int 200 """ Number of channels of images (default -1) """ 201 202 sample_rate: int 203 """ sample_rate of images (default -1) """ 204 205 plannar: bool 206 """ Read/write data as plannar, i.e. not interleaved (default True) """ 207 208 pipe: sp.Popen 209 """ pipe object to ffmpeg/ffprobe (default None)""" 210 211 frameSize: int 212 """ Weight in bytes of one image (default -1)""" 213 214 filename: str 215 """ Filename of the file (default None)""" 216 217 frame_counter: FrameCounter 218 """ `Framecounter` object to count ellapsed time (default None)""" 219 220 def __init__(self, *, logLevel = 16, debugMode = False): 221 """ 222 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 223 224 Parameters 225 ---------- 226 log_level: int (default 16) 227 Log level to pass to the underlying ffmpeg/ffprobe command. 228 229 debugMode: bool (default (False) 230 Show debug info. while processing video 231 """ 232 233 self.mode = PipeMode.UNK_MODE 234 self.logLevel = logLevel 235 self.debugMode = debugMode 236 237 # Call init() method 238 self.init() 239 240 def init(self): 241 """ 242 Init or reinit a VideoIO object. 243 """ 244 self.channels = -1 245 self.sample_rate = -1 246 self.plannar = True 247 self.pipe = None 248 self.frameSize = -1 249 self.filename = None 250 self.frame_counter = None 251 252 _repr_exclude = {"pipe"} 253 """ List of excluded attribute for string conversion. """ 254 255 # converting the object to a string representation 256 def __repr__(self): 257 """ 258 Convert object (excluding attributes in _repr_exclude) to string representation. 259 """ 260 attrs = ", ".join( 261 f"{k}={v!r}" 262 for k, v in self.__dict__.items() 263 if k not in self._repr_exclude 264 ) 265 return f"{self.__class__.__name__}({attrs})" 266 267 __str__ = __repr__ 268 """ String representation """ 269 270 def get_elapsed_time_as_str(self) -> str: 271 """ 272 Method to get elapsed time (float value represented) as str. 273 274 Returns 275 ---------- 276 str or None 277 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 278 None if no frame counter are available. 279 """ 280 if self.frame_counter is None: 281 return None 282 return self.frame_counter.get_elapsed_time_as_str() 283 284 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 285 """ 286 Method to get elapsed time (hour format) as str. 287 288 Returns 289 ---------- 290 str or None 291 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 292 None if no frame counter are available. 293 """ 294 if self.frame_counter is None: 295 return None 296 return self.frame_counter.get_formated_elapsed_time_as_str() 297 298 def get_elapsed_time(self) -> float: 299 """ 300 Method to get elapsed time as float value rounded to 3 decimals. 301 302 Returns 303 ---------- 304 float or None 305 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 306 None if no frame counter are available. 307 """ 308 if self.frame_counter is None: 309 return None 310 return self.frame_counter.get_elapsed_time() 311 312 def is_opened(self) -> bool: 313 """ 314 Method to get status of the underlying pipe to ffmpeg. 315 316 Returns 317 ---------- 318 bool 319 True if pipe is opened (reading or writing mode), False if not. 320 """ 321 # is the pip opened? 322 if self.pipe is not None and self.pipe.poll() is None: 323 return True 324 325 return False 326 327 def close(self): 328 """ 329 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 330 """ 331 if self.pipe is not None: 332 if self.mode == PipeMode.WRITE_MODE: 333 # killing will make ffmpeg not finish properly the job, close the pipe 334 # to let it know that no more data are comming 335 self.pipe.stdin.close() 336 else: # self.mode == PipeMode.READ_MODE 337 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 338 self.pipe.kill() 339 340 # wait for subprocess to end 341 self.pipe.wait() 342 343 # reinit object for later use 344 self.init() 345 346 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 347 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 348 """ 349 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 350 on a AudioIO will close any former open video. 351 352 Parameters 353 ---------- 354 filename: str or path 355 filename of path to the file (mp4, avi, ...) 356 357 sample_rate: int 358 If defined as a positive value, sample_rates of the output file will be set to this value. 359 360 channels: int 361 If defined as a positive value, number of channels of output file will be set to this value. 362 363 fps: 364 If defined as a positive value, fps of input video will be set to this value. 365 366 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 367 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 368 369 encodingParams: str optional (default None) 370 Parameter to pass to ffmpeg to encode video like audio filters. 371 372 plannar : bool optionnal (default True) 373 Input data to write are grouped by channel if True, interleaved instead. 374 375 Returns 376 ---------- 377 bool 378 Was the creation successfull 379 """ 380 381 # Close if already opened 382 self.close() 383 384 # Set geometry/fps of the video stream from params 385 self.sample_rate = int(sample_rate) 386 self.channels = int(channels) 387 self.plannar = plannar 388 389 # Check params 390 if self.sample_rate <= 0 or self.channels <= 0: 391 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 392 393 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 394 self.frameSize = None 395 396 # Video params are set, open the video 397 cmd = [self.audioProgram] # ffmpeg 398 399 if writeOverExistingFile == True: 400 cmd.extend(['-y']) 401 402 cmd.extend(['-hide_banner', 403 '-nostats', 404 '-loglevel', str(self.logLevel), 405 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 406 '-ar', f"{self.sample_rate}", 407 '-ac', f"{self.channels}", 408 '-i', '-']) 409 410 if encodingParams is not None: 411 cmd.extend(encodingParams.split()) 412 413 # remove video 414 cmd.extend( ['-vn', filename ] ) 415 416 if self.debugMode == True: 417 print( ' '.join(cmd), file=sys.stderr ) 418 419 # store filename and set mode 420 self.filename = filename 421 self.mode = PipeMode.WRITE_MODE 422 423 # try call ffmpeg and write frames directly to pipe 424 try: 425 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 426 self.frame_counter = FrameCounter(self.sample_rate) 427 except Exception as e: 428 # if pipe failed, reinit object and raise exception 429 self.init() 430 raise 431 432 return True 433 434 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 435 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 436 """ 437 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 438 on a AudioIO will close any former open file. 439 440 Parameters 441 ---------- 442 filename: str or path 443 filename of path to the file (mp4, avi, ...) 444 445 sample_rate: int optional (default -1) 446 If defined as a positive value, sample rate of the input audio will be converted to this value. 447 448 channels: int optional (default -1) 449 If defined as a positive value, number of channels of the input audio will converted to this value. 450 451 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 452 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 453 454 decodingParams: str optional (default None) 455 Parameter to pass to ffmpeg to decode video like audio filters. 456 457 plannar: bool optionnal (default True) 458 Group audio samples per channel if True. Else, samples are interleaved. 459 460 frame_size: int or float (default 1.0) 461 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 462 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 463 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 464 465 start_time: float optional (default 0.0) 466 Define the reading start time. If not set, reading at beginning of the file. 467 468 Returns 469 ---------- 470 bool 471 Was the opening successfull 472 """ 473 474 # Close if already opened 475 self.close() 476 477 # Force conversion of parameters 478 channels = int(channels) 479 sample_rate = float(sample_rate) 480 481 self.plannar = plannar 482 483 # get parameters from file if needed: 484 if sample_rate <= 0 or channels <= 0: 485 self.channels, self.sample_rate = self.getAudioParams(filename) 486 487 # check if parameters ask to overide video parameters 488 if channels > 0: 489 self.channels = channels 490 if sample_rate > 0: 491 self.sample_rate = sample_rate 492 493 # check parameters 494 495 if isinstance(frame_size,float): 496 # time in seconds 497 self.frame_size = int(frame_size*self.sample_rate) 498 elif isinstance(frame_size,int): 499 # number of samples 500 self.frame_size = frame_size 501 else: 502 # to do 503 pass 504 505 # Video params are set, open the video 506 cmd = [self.audioProgram, # ffmpeg 507 '-hide_banner', 508 '-nostats', 509 '-loglevel', str(self.logLevel)] 510 511 if decodingParams is not None: 512 cmd.extend([decodingParams.split()]) 513 514 if start_time < 0.0: 515 pass 516 elif start_time > 0.0: 517 cmd.extend(["-ss", f"{start_time}"]) 518 519 cmd.extend( ['-i', filename, 520 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 521 '-ar', f"{self.sample_rate}", 522 '-ac', f"{self.channels}", 523 '-' # output to stdout 524 ] 525 ) 526 527 if self.debugMode == True: 528 print( ' '.join(cmd) ) 529 530 # store filename and set mode to READ_MODE 531 self.filename = filename 532 self.mode = PipeMode.READ_MODE 533 534 # try to call ffmpeg to get frames directly from pipe 535 try: 536 self.pipe = sp.Popen(cmd, stdout=sp.PIPE) 537 self.frame_counter = FrameCounter(self.sample_rate) 538 if start_time > 0.0: 539 self.frame_counter += start_time # adding with float means adding time 540 except Exception as e: 541 # if pipe failed, reinit object and raise exception 542 self.init() 543 raise 544 545 return True 546 547 def read_frame(self, with_timestamps = False): 548 """ 549 Read next frame from the audio file 550 551 Parameters 552 ---------- 553 with_timestamps: bool optional (default False) 554 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 555 556 Returns 557 ---------- 558 nparray or FrameContainer 559 A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame 560 of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False. 561 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 562 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 563 """ 564 565 if self.pipe is None: 566 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 567 # - pipe is in write mode 568 if self.mode != PipeMode.READ_MODE: 569 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 570 571 if with_timestamps: 572 # get elapsed time in video, it is time of next frame(s) 573 current_elapsed_time = self.get_elapsed_time() 574 575 # read rgb image from pipe 576 toread = self.frame_size*4 577 buffer = self.pipe.stdout.read(toread) 578 if len(buffer) != toread: 579 # not considered as an error, no more frame, no exception 580 return None 581 582 # get numpy UINT8 array from buffer 583 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 584 585 # make it plannar (or not) 586 if self.plannar: 587 #transpose it 588 audio = audio.T 589 590 # increase frame_counter 591 self.frame_counter.frame_count += (self.frame_size * self.channels) 592 593 # say to gc that this buffer is no longer needed 594 del buffer 595 596 if with_timestamps: 597 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 598 599 return audio 600 601 def read_batch(self, numberOfFrames, with_timestamps = False): 602 """ 603 Read next batch of audio from the file 604 605 Parameters 606 ---------- 607 number_of_frames: int 608 Number of desired images within the batch. The last batch from the file may have less images. 609 610 with_timestamps: bool optional (default False) 611 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 612 613 Returns 614 ---------- 615 nparray or FrameContainer 616 A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch 617 of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False. 618 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 619 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 620 """ 621 622 if self.pipe is None: 623 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 624 # - pipe is in write mode 625 if self.mode != PipeMode.READ_MODE: 626 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 627 628 if with_timestamps: 629 # get elapsed time in video, it is time of next frame(s) 630 current_elapsed_time = self.get_elapsed_time() 631 632 # try to read complete batch 633 toread = self.frame_size*4*self.channels*numberOfFrames 634 buffer = self.pipe.stdout.read(toread) 635 636 # check if we have at least 1 Frame 637 if len(buffer) < toread: 638 # not considered as an error, no more frame, no exception 639 return None 640 641 # compute actual number of Frames 642 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 643 644 # get and reshape batch from buffer 645 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 646 647 if self.plannar: 648 batch = batch.transpose(0, 2, 1) 649 650 # increase frame_counter 651 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 652 653 # say to gc that this buffer is no longer needed 654 del buffer 655 656 if with_timestamps: 657 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 658 659 return batch 660 661 def write_frame(self, audio) -> bool: 662 """ 663 Write an audio frame to the file 664 665 Parameters 666 ---------- 667 audio: nparray 668 The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize). 669 670 Returns 671 ---------- 672 bool 673 Writing was successful or not. 674 """ 675 # Check params 676 # - pipe exists 677 if self.pipe is None: 678 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 679 # - pipe is in write mode 680 if self.mode != PipeMode.WRITE_MODE: 681 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 682 # - shape of image is fine, thus we have pixels for a full compatible frame 683 if audio.shape[0] != self.channels: 684 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 685 # - type of data is Float32 686 if audio.dtype != np.float32: 687 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 688 689 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 690 if not self.plannar: 691 audio = audio.reshape(-1) 692 693 # garantee to have a C continuous array 694 if not audio.flags['C_CONTIGUOUS']: 695 a = np.ascontiguousarray(a) 696 697 # write frame 698 buffer = audio.tobytes() 699 if self.pipe.stdin.write( buffer ) < len(buffer): 700 print( f"Error writing frame to {self.filename}" ) 701 return False 702 703 # increase frame_counter 704 self.frame_counter.frame_count += (self.frame_size * self.channels) 705 706 # say to gc that this buffer is no longer needed 707 del buffer 708 709 return True 710 711 def write_batch(self, batch): 712 """ 713 Write a batch of audio frame to the file 714 715 Parameters 716 ---------- 717 batch: nparray 718 The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data. 719 720 Returns 721 ---------- 722 bool 723 Writing was successful or not. 724 """ 725 # Check params 726 # - pipe exists 727 if self.pipe is None: 728 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 729 # - pipe is in write mode 730 if self.mode != PipeMode.WRITE_MODE: 731 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 732 # batch is 3D (n, channels, nb samples) 733 if batch.ndim !=3: 734 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 735 # - shape of images in batch is fine 736 if batch.shape[1] != self.channels: 737 raise self.AudioIOException("Wrong audio channels in batch: {} expected {}.".format(batch.shape[2], self.channels)) 738 739 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 740 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 741 if not self.plannar: 742 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 743 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 744 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 745 746 # garantee to have a C continuous array 747 if not batch.flags['C_CONTIGUOUS']: 748 batch = np.ascontiguousarray(batch) 749 750 # write frame 751 buffer = batch.tobytes() 752 if self.pipe.stdin.write( buffer ) < len(buffer): 753 # say to gc that this buffer is no longer needed 754 del buffer 755 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 756 757 # increase frame_counter 758 self.frame_counter.frame_count += (batch.shape[0] * self.frame_size * self.channels) 759 760 # say to gc that this buffer is no longer needed 761 del buffer 762 763 return True 764 765 def iter_frames(self, with_timestamps = False): 766 """ 767 Method to iterate on audio frames using AudioIO obj. 768 for audio_frame in obj.iter_frames(): 769 .... 770 771 Parameters 772 ---------- 773 with_timestamps: bool optional (default False) 774 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 775 776 Returns 777 ---------- 778 nparray or FrameContainer 779 A batch of images of shape () 780 """ 781 782 try: 783 if self.mode == PipeMode.READ_MODE: 784 while self.isOpened(): 785 frame = self.readFrame(with_timestamps) 786 if frame is not None: 787 yield frame 788 finally: 789 self.close() 790 791 def iter_batches(self, batch_size : int, with_timestamps = False ): 792 """ 793 Method to iterate on batch ofaudio frames using VideoIO obj. 794 for audio_batch in obj.iter_batches(): 795 .... 796 797 Parameters 798 ---------- 799 with_timestamps: bool optional (default False) 800 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 801 """ 802 try: 803 if self.mode == PipeMode.READ_MODE: 804 while self.isOpened(): 805 batch = self.readBatch(batch_size, with_timestamps) 806 if batch is not None: 807 yield batch 808 finally: 809 self.close() 810 811 # function aliases to be compliant with original C++ version 812 getAudioTimeInSec = get_time_in_sec 813 getAudioParams = get_params 814 isOpened = is_opened 815 readFrame = read_frame 816 readBatch = read_batch 817 writeFrame = write_frame 818 writeBatch = write_batch 819 get_audio_time_in_sec = get_time_in_sec 820 get_audio_params = get_params
31class AudioIO: 32 # "static" variables to ffmpeg, ffprobe executables 33 audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise() 34 35 class AudioIOException(Exception): 36 """ 37 Dedicated exception class for AudioIO class. 38 """ 39 def __init__(self, message="Error while reading/writing video occurs"): 40 self.message = message 41 super().__init__(self.message) 42 43 class AudioFormat(Enum): 44 """ 45 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 46 """ 47 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment) 48 49 @classmethod 50 def reader(cls, filename, **kwargs): 51 """ 52 Create and open an AudioIO object in reader mode 53 54 See ``AudioIO.open`` for the full list of accepted parameters. 55 """ 56 reader = cls() 57 reader.open(filename, **kwargs) 58 return reader 59 60 @classmethod 61 def writer(cls, filename, **kwargs): 62 """ 63 Create and open an AudioIO object in writer mode 64 65 See ``AudioIO.create`` for the full list of accepted parameters. 66 """ 67 writer = cls() 68 writer.create(filename, **kwargs) 69 return writer 70 71 # To use with context manager "with AudioIO.reader(...) as f:' for instance 72 def __enter__(self): 73 """ 74 Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 75 """ 76 # simply return myself 77 return self 78 79 def __exit__(self, exc_type, exc_val, exc_tb): 80 """ 81 Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 82 """ 83 # close AudioIO 84 self.close() 85 return False 86 87 @staticmethod 88 def get_time_in_sec(filename, *, debug=False, logLevel=16): 89 """ 90 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 91 92 Parameters 93 ---------- 94 filename : str or path. 95 Raw audio waveform as a 1D array. 96 97 debug : bool (default False). 98 Show debug info. 99 100 log_level: int (default 16). 101 Log level to pass to the underlying ffmpeg/ffprobe command. 102 103 Returns 104 ---------- 105 float 106 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 107 """ 108 109 cmd = [AudioIO.paramProgram, # ffprobe 110 '-hide_banner', 111 '-loglevel', str(logLevel), 112 '-show_entries', 'format=duration', 113 '-of', 'default=noprint_wrappers=1:nokey=1', 114 filename 115 ] 116 117 if debug == True: 118 print(' '.join(cmd)) 119 120 # call ffprobe and get params in one single line 121 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 122 output = lpipe.stdout.readlines() 123 lpipe.terminate() 124 # transform Bytes output to one single string 125 output = ''.join( [element.decode('utf-8') for element in output]) 126 127 try: 128 return float(output) 129 except (ValueError, TypeError): 130 return None 131 132 @staticmethod 133 def get_params(filename, *, debug=False, logLevel=16): 134 """ 135 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 136 137 Parameters 138 ---------- 139 filename : str or path. 140 Raw audio waveform as a 1D array. 141 142 debug : bool (default (False). 143 Show debug info. 144 145 log_level: int (default 16). 146 Log level to pass to the underlying ffmpeg/ffprobe command. 147 148 Returns 149 ---------- 150 tuple 151 Tuple containing (channels,sample_rate) of the file 152 """ 153 cmd = [AudioIO.paramProgram, # ffprobe 154 '-hide_banner', 155 '-loglevel', str(logLevel), 156 '-show_entries', 'stream=channels,sample_rate', 157 filename 158 ] 159 160 if debug == True: 161 print(' '.join(cmd)) 162 163 # call ffprobe and get params in one single line 164 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 165 output = lpipe.stdout.readlines() 166 lpipe.terminate() 167 # transform Bytes output to one single string 168 output = ''.join( [element.decode('utf-8') for element in output]) 169 170 pattern_sample_rate = r'sample_rate=(\d+)' 171 pattern_channels = r'channels=(\d+)' 172 173 # Search for values in the ffprobe output 174 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 175 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 176 177 # Extraction des valeurs 178 if match_sample_rate: 179 sample_rate = int(match_sample_rate.group(1)) 180 else: 181 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 182 183 if match_channels: 184 channels = int(match_channels.group(1)) 185 else: 186 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 187 188 return (channels,sample_rate) 189 190 # Attributes 191 mode: PipeMode 192 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 193 194 loglevel: int 195 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 196 197 debugModel: bool 198 """ debutMode flag for this object (print debut info, default False)""" 199 200 channels: int 201 """ Number of channels of images (default -1) """ 202 203 sample_rate: int 204 """ sample_rate of images (default -1) """ 205 206 plannar: bool 207 """ Read/write data as plannar, i.e. not interleaved (default True) """ 208 209 pipe: sp.Popen 210 """ pipe object to ffmpeg/ffprobe (default None)""" 211 212 frameSize: int 213 """ Weight in bytes of one image (default -1)""" 214 215 filename: str 216 """ Filename of the file (default None)""" 217 218 frame_counter: FrameCounter 219 """ `Framecounter` object to count ellapsed time (default None)""" 220 221 def __init__(self, *, logLevel = 16, debugMode = False): 222 """ 223 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 224 225 Parameters 226 ---------- 227 log_level: int (default 16) 228 Log level to pass to the underlying ffmpeg/ffprobe command. 229 230 debugMode: bool (default (False) 231 Show debug info. while processing video 232 """ 233 234 self.mode = PipeMode.UNK_MODE 235 self.logLevel = logLevel 236 self.debugMode = debugMode 237 238 # Call init() method 239 self.init() 240 241 def init(self): 242 """ 243 Init or reinit a VideoIO object. 244 """ 245 self.channels = -1 246 self.sample_rate = -1 247 self.plannar = True 248 self.pipe = None 249 self.frameSize = -1 250 self.filename = None 251 self.frame_counter = None 252 253 _repr_exclude = {"pipe"} 254 """ List of excluded attribute for string conversion. """ 255 256 # converting the object to a string representation 257 def __repr__(self): 258 """ 259 Convert object (excluding attributes in _repr_exclude) to string representation. 260 """ 261 attrs = ", ".join( 262 f"{k}={v!r}" 263 for k, v in self.__dict__.items() 264 if k not in self._repr_exclude 265 ) 266 return f"{self.__class__.__name__}({attrs})" 267 268 __str__ = __repr__ 269 """ String representation """ 270 271 def get_elapsed_time_as_str(self) -> str: 272 """ 273 Method to get elapsed time (float value represented) as str. 274 275 Returns 276 ---------- 277 str or None 278 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 279 None if no frame counter are available. 280 """ 281 if self.frame_counter is None: 282 return None 283 return self.frame_counter.get_elapsed_time_as_str() 284 285 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 286 """ 287 Method to get elapsed time (hour format) as str. 288 289 Returns 290 ---------- 291 str or None 292 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 293 None if no frame counter are available. 294 """ 295 if self.frame_counter is None: 296 return None 297 return self.frame_counter.get_formated_elapsed_time_as_str() 298 299 def get_elapsed_time(self) -> float: 300 """ 301 Method to get elapsed time as float value rounded to 3 decimals. 302 303 Returns 304 ---------- 305 float or None 306 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 307 None if no frame counter are available. 308 """ 309 if self.frame_counter is None: 310 return None 311 return self.frame_counter.get_elapsed_time() 312 313 def is_opened(self) -> bool: 314 """ 315 Method to get status of the underlying pipe to ffmpeg. 316 317 Returns 318 ---------- 319 bool 320 True if pipe is opened (reading or writing mode), False if not. 321 """ 322 # is the pip opened? 323 if self.pipe is not None and self.pipe.poll() is None: 324 return True 325 326 return False 327 328 def close(self): 329 """ 330 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 331 """ 332 if self.pipe is not None: 333 if self.mode == PipeMode.WRITE_MODE: 334 # killing will make ffmpeg not finish properly the job, close the pipe 335 # to let it know that no more data are comming 336 self.pipe.stdin.close() 337 else: # self.mode == PipeMode.READ_MODE 338 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 339 self.pipe.kill() 340 341 # wait for subprocess to end 342 self.pipe.wait() 343 344 # reinit object for later use 345 self.init() 346 347 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 348 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 349 """ 350 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 351 on a AudioIO will close any former open video. 352 353 Parameters 354 ---------- 355 filename: str or path 356 filename of path to the file (mp4, avi, ...) 357 358 sample_rate: int 359 If defined as a positive value, sample_rates of the output file will be set to this value. 360 361 channels: int 362 If defined as a positive value, number of channels of output file will be set to this value. 363 364 fps: 365 If defined as a positive value, fps of input video will be set to this value. 366 367 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 368 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 369 370 encodingParams: str optional (default None) 371 Parameter to pass to ffmpeg to encode video like audio filters. 372 373 plannar : bool optionnal (default True) 374 Input data to write are grouped by channel if True, interleaved instead. 375 376 Returns 377 ---------- 378 bool 379 Was the creation successfull 380 """ 381 382 # Close if already opened 383 self.close() 384 385 # Set geometry/fps of the video stream from params 386 self.sample_rate = int(sample_rate) 387 self.channels = int(channels) 388 self.plannar = plannar 389 390 # Check params 391 if self.sample_rate <= 0 or self.channels <= 0: 392 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 393 394 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 395 self.frameSize = None 396 397 # Video params are set, open the video 398 cmd = [self.audioProgram] # ffmpeg 399 400 if writeOverExistingFile == True: 401 cmd.extend(['-y']) 402 403 cmd.extend(['-hide_banner', 404 '-nostats', 405 '-loglevel', str(self.logLevel), 406 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 407 '-ar', f"{self.sample_rate}", 408 '-ac', f"{self.channels}", 409 '-i', '-']) 410 411 if encodingParams is not None: 412 cmd.extend(encodingParams.split()) 413 414 # remove video 415 cmd.extend( ['-vn', filename ] ) 416 417 if self.debugMode == True: 418 print( ' '.join(cmd), file=sys.stderr ) 419 420 # store filename and set mode 421 self.filename = filename 422 self.mode = PipeMode.WRITE_MODE 423 424 # try call ffmpeg and write frames directly to pipe 425 try: 426 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 427 self.frame_counter = FrameCounter(self.sample_rate) 428 except Exception as e: 429 # if pipe failed, reinit object and raise exception 430 self.init() 431 raise 432 433 return True 434 435 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 436 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 437 """ 438 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 439 on a AudioIO will close any former open file. 440 441 Parameters 442 ---------- 443 filename: str or path 444 filename of path to the file (mp4, avi, ...) 445 446 sample_rate: int optional (default -1) 447 If defined as a positive value, sample rate of the input audio will be converted to this value. 448 449 channels: int optional (default -1) 450 If defined as a positive value, number of channels of the input audio will converted to this value. 451 452 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 453 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 454 455 decodingParams: str optional (default None) 456 Parameter to pass to ffmpeg to decode video like audio filters. 457 458 plannar: bool optionnal (default True) 459 Group audio samples per channel if True. Else, samples are interleaved. 460 461 frame_size: int or float (default 1.0) 462 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 463 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 464 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 465 466 start_time: float optional (default 0.0) 467 Define the reading start time. If not set, reading at beginning of the file. 468 469 Returns 470 ---------- 471 bool 472 Was the opening successfull 473 """ 474 475 # Close if already opened 476 self.close() 477 478 # Force conversion of parameters 479 channels = int(channels) 480 sample_rate = float(sample_rate) 481 482 self.plannar = plannar 483 484 # get parameters from file if needed: 485 if sample_rate <= 0 or channels <= 0: 486 self.channels, self.sample_rate = self.getAudioParams(filename) 487 488 # check if parameters ask to overide video parameters 489 if channels > 0: 490 self.channels = channels 491 if sample_rate > 0: 492 self.sample_rate = sample_rate 493 494 # check parameters 495 496 if isinstance(frame_size,float): 497 # time in seconds 498 self.frame_size = int(frame_size*self.sample_rate) 499 elif isinstance(frame_size,int): 500 # number of samples 501 self.frame_size = frame_size 502 else: 503 # to do 504 pass 505 506 # Video params are set, open the video 507 cmd = [self.audioProgram, # ffmpeg 508 '-hide_banner', 509 '-nostats', 510 '-loglevel', str(self.logLevel)] 511 512 if decodingParams is not None: 513 cmd.extend([decodingParams.split()]) 514 515 if start_time < 0.0: 516 pass 517 elif start_time > 0.0: 518 cmd.extend(["-ss", f"{start_time}"]) 519 520 cmd.extend( ['-i', filename, 521 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 522 '-ar', f"{self.sample_rate}", 523 '-ac', f"{self.channels}", 524 '-' # output to stdout 525 ] 526 ) 527 528 if self.debugMode == True: 529 print( ' '.join(cmd) ) 530 531 # store filename and set mode to READ_MODE 532 self.filename = filename 533 self.mode = PipeMode.READ_MODE 534 535 # try to call ffmpeg to get frames directly from pipe 536 try: 537 self.pipe = sp.Popen(cmd, stdout=sp.PIPE) 538 self.frame_counter = FrameCounter(self.sample_rate) 539 if start_time > 0.0: 540 self.frame_counter += start_time # adding with float means adding time 541 except Exception as e: 542 # if pipe failed, reinit object and raise exception 543 self.init() 544 raise 545 546 return True 547 548 def read_frame(self, with_timestamps = False): 549 """ 550 Read next frame from the audio file 551 552 Parameters 553 ---------- 554 with_timestamps: bool optional (default False) 555 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 556 557 Returns 558 ---------- 559 nparray or FrameContainer 560 A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame 561 of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False. 562 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 563 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 564 """ 565 566 if self.pipe is None: 567 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 568 # - pipe is in write mode 569 if self.mode != PipeMode.READ_MODE: 570 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 571 572 if with_timestamps: 573 # get elapsed time in video, it is time of next frame(s) 574 current_elapsed_time = self.get_elapsed_time() 575 576 # read rgb image from pipe 577 toread = self.frame_size*4 578 buffer = self.pipe.stdout.read(toread) 579 if len(buffer) != toread: 580 # not considered as an error, no more frame, no exception 581 return None 582 583 # get numpy UINT8 array from buffer 584 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 585 586 # make it plannar (or not) 587 if self.plannar: 588 #transpose it 589 audio = audio.T 590 591 # increase frame_counter 592 self.frame_counter.frame_count += (self.frame_size * self.channels) 593 594 # say to gc that this buffer is no longer needed 595 del buffer 596 597 if with_timestamps: 598 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 599 600 return audio 601 602 def read_batch(self, numberOfFrames, with_timestamps = False): 603 """ 604 Read next batch of audio from the file 605 606 Parameters 607 ---------- 608 number_of_frames: int 609 Number of desired images within the batch. The last batch from the file may have less images. 610 611 with_timestamps: bool optional (default False) 612 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 613 614 Returns 615 ---------- 616 nparray or FrameContainer 617 A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch 618 of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False. 619 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 620 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 621 """ 622 623 if self.pipe is None: 624 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 625 # - pipe is in write mode 626 if self.mode != PipeMode.READ_MODE: 627 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 628 629 if with_timestamps: 630 # get elapsed time in video, it is time of next frame(s) 631 current_elapsed_time = self.get_elapsed_time() 632 633 # try to read complete batch 634 toread = self.frame_size*4*self.channels*numberOfFrames 635 buffer = self.pipe.stdout.read(toread) 636 637 # check if we have at least 1 Frame 638 if len(buffer) < toread: 639 # not considered as an error, no more frame, no exception 640 return None 641 642 # compute actual number of Frames 643 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 644 645 # get and reshape batch from buffer 646 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 647 648 if self.plannar: 649 batch = batch.transpose(0, 2, 1) 650 651 # increase frame_counter 652 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 653 654 # say to gc that this buffer is no longer needed 655 del buffer 656 657 if with_timestamps: 658 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 659 660 return batch 661 662 def write_frame(self, audio) -> bool: 663 """ 664 Write an audio frame to the file 665 666 Parameters 667 ---------- 668 audio: nparray 669 The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize). 670 671 Returns 672 ---------- 673 bool 674 Writing was successful or not. 675 """ 676 # Check params 677 # - pipe exists 678 if self.pipe is None: 679 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 680 # - pipe is in write mode 681 if self.mode != PipeMode.WRITE_MODE: 682 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 683 # - shape of image is fine, thus we have pixels for a full compatible frame 684 if audio.shape[0] != self.channels: 685 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 686 # - type of data is Float32 687 if audio.dtype != np.float32: 688 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 689 690 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 691 if not self.plannar: 692 audio = audio.reshape(-1) 693 694 # garantee to have a C continuous array 695 if not audio.flags['C_CONTIGUOUS']: 696 a = np.ascontiguousarray(a) 697 698 # write frame 699 buffer = audio.tobytes() 700 if self.pipe.stdin.write( buffer ) < len(buffer): 701 print( f"Error writing frame to {self.filename}" ) 702 return False 703 704 # increase frame_counter 705 self.frame_counter.frame_count += (self.frame_size * self.channels) 706 707 # say to gc that this buffer is no longer needed 708 del buffer 709 710 return True 711 712 def write_batch(self, batch): 713 """ 714 Write a batch of audio frame to the file 715 716 Parameters 717 ---------- 718 batch: nparray 719 The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data. 720 721 Returns 722 ---------- 723 bool 724 Writing was successful or not. 725 """ 726 # Check params 727 # - pipe exists 728 if self.pipe is None: 729 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 730 # - pipe is in write mode 731 if self.mode != PipeMode.WRITE_MODE: 732 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 733 # batch is 3D (n, channels, nb samples) 734 if batch.ndim !=3: 735 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 736 # - shape of images in batch is fine 737 if batch.shape[1] != self.channels: 738 raise self.AudioIOException("Wrong audio channels in batch: {} expected {}.".format(batch.shape[2], self.channels)) 739 740 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 741 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 742 if not self.plannar: 743 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 744 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 745 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 746 747 # garantee to have a C continuous array 748 if not batch.flags['C_CONTIGUOUS']: 749 batch = np.ascontiguousarray(batch) 750 751 # write frame 752 buffer = batch.tobytes() 753 if self.pipe.stdin.write( buffer ) < len(buffer): 754 # say to gc that this buffer is no longer needed 755 del buffer 756 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 757 758 # increase frame_counter 759 self.frame_counter.frame_count += (batch.shape[0] * self.frame_size * self.channels) 760 761 # say to gc that this buffer is no longer needed 762 del buffer 763 764 return True 765 766 def iter_frames(self, with_timestamps = False): 767 """ 768 Method to iterate on audio frames using AudioIO obj. 769 for audio_frame in obj.iter_frames(): 770 .... 771 772 Parameters 773 ---------- 774 with_timestamps: bool optional (default False) 775 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 776 777 Returns 778 ---------- 779 nparray or FrameContainer 780 A batch of images of shape () 781 """ 782 783 try: 784 if self.mode == PipeMode.READ_MODE: 785 while self.isOpened(): 786 frame = self.readFrame(with_timestamps) 787 if frame is not None: 788 yield frame 789 finally: 790 self.close() 791 792 def iter_batches(self, batch_size : int, with_timestamps = False ): 793 """ 794 Method to iterate on batch ofaudio frames using VideoIO obj. 795 for audio_batch in obj.iter_batches(): 796 .... 797 798 Parameters 799 ---------- 800 with_timestamps: bool optional (default False) 801 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 802 """ 803 try: 804 if self.mode == PipeMode.READ_MODE: 805 while self.isOpened(): 806 batch = self.readBatch(batch_size, with_timestamps) 807 if batch is not None: 808 yield batch 809 finally: 810 self.close() 811 812 # function aliases to be compliant with original C++ version 813 getAudioTimeInSec = get_time_in_sec 814 getAudioParams = get_params 815 isOpened = is_opened 816 readFrame = read_frame 817 readBatch = read_batch 818 writeFrame = write_frame 819 writeBatch = write_batch 820 get_audio_time_in_sec = get_time_in_sec 821 get_audio_params = get_params
221 def __init__(self, *, logLevel = 16, debugMode = False): 222 """ 223 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 224 225 Parameters 226 ---------- 227 log_level: int (default 16) 228 Log level to pass to the underlying ffmpeg/ffprobe command. 229 230 debugMode: bool (default (False) 231 Show debug info. while processing video 232 """ 233 234 self.mode = PipeMode.UNK_MODE 235 self.logLevel = logLevel 236 self.debugMode = debugMode 237 238 # Call init() method 239 self.init()
Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode
Parameters
log_level: int (default 16) Log level to pass to the underlying ffmpeg/ffprobe command.
debugMode: bool (default (False) Show debug info. while processing video
49 @classmethod 50 def reader(cls, filename, **kwargs): 51 """ 52 Create and open an AudioIO object in reader mode 53 54 See ``AudioIO.open`` for the full list of accepted parameters. 55 """ 56 reader = cls() 57 reader.open(filename, **kwargs) 58 return reader
Create and open an AudioIO object in reader mode
See AudioIO.open for the full list of accepted parameters.
60 @classmethod 61 def writer(cls, filename, **kwargs): 62 """ 63 Create and open an AudioIO object in writer mode 64 65 See ``AudioIO.create`` for the full list of accepted parameters. 66 """ 67 writer = cls() 68 writer.create(filename, **kwargs) 69 return writer
Create and open an AudioIO object in writer mode
See AudioIO.create for the full list of accepted parameters.
87 @staticmethod 88 def get_time_in_sec(filename, *, debug=False, logLevel=16): 89 """ 90 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 91 92 Parameters 93 ---------- 94 filename : str or path. 95 Raw audio waveform as a 1D array. 96 97 debug : bool (default False). 98 Show debug info. 99 100 log_level: int (default 16). 101 Log level to pass to the underlying ffmpeg/ffprobe command. 102 103 Returns 104 ---------- 105 float 106 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 107 """ 108 109 cmd = [AudioIO.paramProgram, # ffprobe 110 '-hide_banner', 111 '-loglevel', str(logLevel), 112 '-show_entries', 'format=duration', 113 '-of', 'default=noprint_wrappers=1:nokey=1', 114 filename 115 ] 116 117 if debug == True: 118 print(' '.join(cmd)) 119 120 # call ffprobe and get params in one single line 121 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 122 output = lpipe.stdout.readlines() 123 lpipe.terminate() 124 # transform Bytes output to one single string 125 output = ''.join( [element.decode('utf-8') for element in output]) 126 127 try: 128 return float(output) 129 except (ValueError, TypeError): 130 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
132 @staticmethod 133 def get_params(filename, *, debug=False, logLevel=16): 134 """ 135 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 136 137 Parameters 138 ---------- 139 filename : str or path. 140 Raw audio waveform as a 1D array. 141 142 debug : bool (default (False). 143 Show debug info. 144 145 log_level: int (default 16). 146 Log level to pass to the underlying ffmpeg/ffprobe command. 147 148 Returns 149 ---------- 150 tuple 151 Tuple containing (channels,sample_rate) of the file 152 """ 153 cmd = [AudioIO.paramProgram, # ffprobe 154 '-hide_banner', 155 '-loglevel', str(logLevel), 156 '-show_entries', 'stream=channels,sample_rate', 157 filename 158 ] 159 160 if debug == True: 161 print(' '.join(cmd)) 162 163 # call ffprobe and get params in one single line 164 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 165 output = lpipe.stdout.readlines() 166 lpipe.terminate() 167 # transform Bytes output to one single string 168 output = ''.join( [element.decode('utf-8') for element in output]) 169 170 pattern_sample_rate = r'sample_rate=(\d+)' 171 pattern_channels = r'channels=(\d+)' 172 173 # Search for values in the ffprobe output 174 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 175 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 176 177 # Extraction des valeurs 178 if match_sample_rate: 179 sample_rate = int(match_sample_rate.group(1)) 180 else: 181 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 182 183 if match_channels: 184 channels = int(match_channels.group(1)) 185 else: 186 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 187 188 return (channels,sample_rate) 189 190 # Attributes 191 mode: PipeMode 192 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 193 194 loglevel: int 195 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 196 197 debugModel: bool 198 """ debutMode flag for this object (print debut info, default False)""" 199 200 channels: int 201 """ Number of channels of images (default -1) """ 202 203 sample_rate: int 204 """ sample_rate of images (default -1) """ 205 206 plannar: bool 207 """ Read/write data as plannar, i.e. not interleaved (default True) """ 208 209 pipe: sp.Popen 210 """ pipe object to ffmpeg/ffprobe (default None)""" 211 212 frameSize: int 213 """ Weight in bytes of one image (default -1)""" 214 215 filename: str 216 """ Filename of the file (default None)""" 217 218 frame_counter: FrameCounter 219 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
241 def init(self): 242 """ 243 Init or reinit a VideoIO object. 244 """ 245 self.channels = -1 246 self.sample_rate = -1 247 self.plannar = True 248 self.pipe = None 249 self.frameSize = -1 250 self.filename = None 251 self.frame_counter = None
Init or reinit a VideoIO object.
271 def get_elapsed_time_as_str(self) -> str: 272 """ 273 Method to get elapsed time (float value represented) as str. 274 275 Returns 276 ---------- 277 str or None 278 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 279 None if no frame counter are available. 280 """ 281 if self.frame_counter is None: 282 return None 283 return self.frame_counter.get_elapsed_time_as_str()
Method to get elapsed time (float value represented) as str.
Returns
str or None Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
285 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 286 """ 287 Method to get elapsed time (hour format) as str. 288 289 Returns 290 ---------- 291 str or None 292 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 293 None if no frame counter are available. 294 """ 295 if self.frame_counter is None: 296 return None 297 return self.frame_counter.get_formated_elapsed_time_as_str()
Method to get elapsed time (hour format) as str.
Returns
str or None Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
299 def get_elapsed_time(self) -> float: 300 """ 301 Method to get elapsed time as float value rounded to 3 decimals. 302 303 Returns 304 ---------- 305 float or None 306 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 307 None if no frame counter are available. 308 """ 309 if self.frame_counter is None: 310 return None 311 return self.frame_counter.get_elapsed_time()
Method to get elapsed time as float value rounded to 3 decimals.
Returns
float or None Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
313 def is_opened(self) -> bool: 314 """ 315 Method to get status of the underlying pipe to ffmpeg. 316 317 Returns 318 ---------- 319 bool 320 True if pipe is opened (reading or writing mode), False if not. 321 """ 322 # is the pip opened? 323 if self.pipe is not None and self.pipe.poll() is None: 324 return True 325 326 return False
Method to get status of the underlying pipe to ffmpeg.
Returns
bool True if pipe is opened (reading or writing mode), False if not.
328 def close(self): 329 """ 330 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 331 """ 332 if self.pipe is not None: 333 if self.mode == PipeMode.WRITE_MODE: 334 # killing will make ffmpeg not finish properly the job, close the pipe 335 # to let it know that no more data are comming 336 self.pipe.stdin.close() 337 else: # self.mode == PipeMode.READ_MODE 338 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 339 self.pipe.kill() 340 341 # wait for subprocess to end 342 self.pipe.wait() 343 344 # reinit object for later use 345 self.init()
Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods.
347 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 348 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 349 """ 350 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 351 on a AudioIO will close any former open video. 352 353 Parameters 354 ---------- 355 filename: str or path 356 filename of path to the file (mp4, avi, ...) 357 358 sample_rate: int 359 If defined as a positive value, sample_rates of the output file will be set to this value. 360 361 channels: int 362 If defined as a positive value, number of channels of output file will be set to this value. 363 364 fps: 365 If defined as a positive value, fps of input video will be set to this value. 366 367 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 368 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 369 370 encodingParams: str optional (default None) 371 Parameter to pass to ffmpeg to encode video like audio filters. 372 373 plannar : bool optionnal (default True) 374 Input data to write are grouped by channel if True, interleaved instead. 375 376 Returns 377 ---------- 378 bool 379 Was the creation successfull 380 """ 381 382 # Close if already opened 383 self.close() 384 385 # Set geometry/fps of the video stream from params 386 self.sample_rate = int(sample_rate) 387 self.channels = int(channels) 388 self.plannar = plannar 389 390 # Check params 391 if self.sample_rate <= 0 or self.channels <= 0: 392 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 393 394 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 395 self.frameSize = None 396 397 # Video params are set, open the video 398 cmd = [self.audioProgram] # ffmpeg 399 400 if writeOverExistingFile == True: 401 cmd.extend(['-y']) 402 403 cmd.extend(['-hide_banner', 404 '-nostats', 405 '-loglevel', str(self.logLevel), 406 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 407 '-ar', f"{self.sample_rate}", 408 '-ac', f"{self.channels}", 409 '-i', '-']) 410 411 if encodingParams is not None: 412 cmd.extend(encodingParams.split()) 413 414 # remove video 415 cmd.extend( ['-vn', filename ] ) 416 417 if self.debugMode == True: 418 print( ' '.join(cmd), file=sys.stderr ) 419 420 # store filename and set mode 421 self.filename = filename 422 self.mode = PipeMode.WRITE_MODE 423 424 # try call ffmpeg and write frames directly to pipe 425 try: 426 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 427 self.frame_counter = FrameCounter(self.sample_rate) 428 except Exception as e: 429 # if pipe failed, reinit object and raise exception 430 self.init() 431 raise 432 433 return True
Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create on a AudioIO will close any former open video.
Parameters
filename: str or path filename of path to the file (mp4, avi, ...)
sample_rate: int If defined as a positive value, sample_rates of the output file will be set to this value.
channels: int If defined as a positive value, number of channels of output file will be set to this value.
fps: If defined as a positive value, fps of input video will be set to this value.
outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.
encodingParams: str optional (default None) Parameter to pass to ffmpeg to encode video like audio filters.
plannar : bool optionnal (default True) Input data to write are grouped by channel if True, interleaved instead.
Returns
bool Was the creation successfull
435 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 436 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 437 """ 438 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 439 on a AudioIO will close any former open file. 440 441 Parameters 442 ---------- 443 filename: str or path 444 filename of path to the file (mp4, avi, ...) 445 446 sample_rate: int optional (default -1) 447 If defined as a positive value, sample rate of the input audio will be converted to this value. 448 449 channels: int optional (default -1) 450 If defined as a positive value, number of channels of the input audio will converted to this value. 451 452 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 453 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 454 455 decodingParams: str optional (default None) 456 Parameter to pass to ffmpeg to decode video like audio filters. 457 458 plannar: bool optionnal (default True) 459 Group audio samples per channel if True. Else, samples are interleaved. 460 461 frame_size: int or float (default 1.0) 462 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 463 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 464 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 465 466 start_time: float optional (default 0.0) 467 Define the reading start time. If not set, reading at beginning of the file. 468 469 Returns 470 ---------- 471 bool 472 Was the opening successfull 473 """ 474 475 # Close if already opened 476 self.close() 477 478 # Force conversion of parameters 479 channels = int(channels) 480 sample_rate = float(sample_rate) 481 482 self.plannar = plannar 483 484 # get parameters from file if needed: 485 if sample_rate <= 0 or channels <= 0: 486 self.channels, self.sample_rate = self.getAudioParams(filename) 487 488 # check if parameters ask to overide video parameters 489 if channels > 0: 490 self.channels = channels 491 if sample_rate > 0: 492 self.sample_rate = sample_rate 493 494 # check parameters 495 496 if isinstance(frame_size,float): 497 # time in seconds 498 self.frame_size = int(frame_size*self.sample_rate) 499 elif isinstance(frame_size,int): 500 # number of samples 501 self.frame_size = frame_size 502 else: 503 # to do 504 pass 505 506 # Video params are set, open the video 507 cmd = [self.audioProgram, # ffmpeg 508 '-hide_banner', 509 '-nostats', 510 '-loglevel', str(self.logLevel)] 511 512 if decodingParams is not None: 513 cmd.extend([decodingParams.split()]) 514 515 if start_time < 0.0: 516 pass 517 elif start_time > 0.0: 518 cmd.extend(["-ss", f"{start_time}"]) 519 520 cmd.extend( ['-i', filename, 521 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 522 '-ar', f"{self.sample_rate}", 523 '-ac', f"{self.channels}", 524 '-' # output to stdout 525 ] 526 ) 527 528 if self.debugMode == True: 529 print( ' '.join(cmd) ) 530 531 # store filename and set mode to READ_MODE 532 self.filename = filename 533 self.mode = PipeMode.READ_MODE 534 535 # try to call ffmpeg to get frames directly from pipe 536 try: 537 self.pipe = sp.Popen(cmd, stdout=sp.PIPE) 538 self.frame_counter = FrameCounter(self.sample_rate) 539 if start_time > 0.0: 540 self.frame_counter += start_time # adding with float means adding time 541 except Exception as e: 542 # if pipe failed, reinit object and raise exception 543 self.init() 544 raise 545 546 return True
Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open on a AudioIO will close any former open file.
Parameters
filename: str or path filename of path to the file (mp4, avi, ...)
sample_rate: int optional (default -1) If defined as a positive value, sample rate of the input audio will be converted to this value.
channels: int optional (default -1) If defined as a positive value, number of channels of the input audio will converted to this value.
inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.
decodingParams: str optional (default None) Parameter to pass to ffmpeg to decode video like audio filters.
plannar: bool optionnal (default True) Group audio samples per channel if True. Else, samples are interleaved.
frame_size: int or float (default 1.0) If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)
start_time: float optional (default 0.0) Define the reading start time. If not set, reading at beginning of the file.
Returns
bool Was the opening successfull
548 def read_frame(self, with_timestamps = False): 549 """ 550 Read next frame from the audio file 551 552 Parameters 553 ---------- 554 with_timestamps: bool optional (default False) 555 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 556 557 Returns 558 ---------- 559 nparray or FrameContainer 560 A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame 561 of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False. 562 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 563 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 564 """ 565 566 if self.pipe is None: 567 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 568 # - pipe is in write mode 569 if self.mode != PipeMode.READ_MODE: 570 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 571 572 if with_timestamps: 573 # get elapsed time in video, it is time of next frame(s) 574 current_elapsed_time = self.get_elapsed_time() 575 576 # read rgb image from pipe 577 toread = self.frame_size*4 578 buffer = self.pipe.stdout.read(toread) 579 if len(buffer) != toread: 580 # not considered as an error, no more frame, no exception 581 return None 582 583 # get numpy UINT8 array from buffer 584 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 585 586 # make it plannar (or not) 587 if self.plannar: 588 #transpose it 589 audio = audio.T 590 591 # increase frame_counter 592 self.frame_counter.frame_count += (self.frame_size * self.channels) 593 594 # say to gc that this buffer is no longer needed 595 del buffer 596 597 if with_timestamps: 598 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 599 600 return audio
Read next frame from the audio file
Parameters
with_timestamps: bool optional (default False)
If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)
Returns
nparray or FrameContainer
A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame
of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element).
602 def read_batch(self, numberOfFrames, with_timestamps = False): 603 """ 604 Read next batch of audio from the file 605 606 Parameters 607 ---------- 608 number_of_frames: int 609 Number of desired images within the batch. The last batch from the file may have less images. 610 611 with_timestamps: bool optional (default False) 612 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 613 614 Returns 615 ---------- 616 nparray or FrameContainer 617 A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch 618 of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False. 619 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 620 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 621 """ 622 623 if self.pipe is None: 624 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 625 # - pipe is in write mode 626 if self.mode != PipeMode.READ_MODE: 627 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 628 629 if with_timestamps: 630 # get elapsed time in video, it is time of next frame(s) 631 current_elapsed_time = self.get_elapsed_time() 632 633 # try to read complete batch 634 toread = self.frame_size*4*self.channels*numberOfFrames 635 buffer = self.pipe.stdout.read(toread) 636 637 # check if we have at least 1 Frame 638 if len(buffer) < toread: 639 # not considered as an error, no more frame, no exception 640 return None 641 642 # compute actual number of Frames 643 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 644 645 # get and reshape batch from buffer 646 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 647 648 if self.plannar: 649 batch = batch.transpose(0, 2, 1) 650 651 # increase frame_counter 652 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 653 654 # say to gc that this buffer is no longer needed 655 del buffer 656 657 if with_timestamps: 658 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 659 660 return batch
Read next batch of audio from the file
Parameters
number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
Returns
nparray or FrameContainer
A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch
of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).
662 def write_frame(self, audio) -> bool: 663 """ 664 Write an audio frame to the file 665 666 Parameters 667 ---------- 668 audio: nparray 669 The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize). 670 671 Returns 672 ---------- 673 bool 674 Writing was successful or not. 675 """ 676 # Check params 677 # - pipe exists 678 if self.pipe is None: 679 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 680 # - pipe is in write mode 681 if self.mode != PipeMode.WRITE_MODE: 682 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 683 # - shape of image is fine, thus we have pixels for a full compatible frame 684 if audio.shape[0] != self.channels: 685 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 686 # - type of data is Float32 687 if audio.dtype != np.float32: 688 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 689 690 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 691 if not self.plannar: 692 audio = audio.reshape(-1) 693 694 # garantee to have a C continuous array 695 if not audio.flags['C_CONTIGUOUS']: 696 a = np.ascontiguousarray(a) 697 698 # write frame 699 buffer = audio.tobytes() 700 if self.pipe.stdin.write( buffer ) < len(buffer): 701 print( f"Error writing frame to {self.filename}" ) 702 return False 703 704 # increase frame_counter 705 self.frame_counter.frame_count += (self.frame_size * self.channels) 706 707 # say to gc that this buffer is no longer needed 708 del buffer 709 710 return True
Write an audio frame to the file
Parameters
audio: nparray The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize).
Returns
bool Writing was successful or not.
712 def write_batch(self, batch): 713 """ 714 Write a batch of audio frame to the file 715 716 Parameters 717 ---------- 718 batch: nparray 719 The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data. 720 721 Returns 722 ---------- 723 bool 724 Writing was successful or not. 725 """ 726 # Check params 727 # - pipe exists 728 if self.pipe is None: 729 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 730 # - pipe is in write mode 731 if self.mode != PipeMode.WRITE_MODE: 732 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 733 # batch is 3D (n, channels, nb samples) 734 if batch.ndim !=3: 735 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 736 # - shape of images in batch is fine 737 if batch.shape[1] != self.channels: 738 raise self.AudioIOException("Wrong audio channels in batch: {} expected {}.".format(batch.shape[2], self.channels)) 739 740 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 741 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 742 if not self.plannar: 743 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 744 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 745 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 746 747 # garantee to have a C continuous array 748 if not batch.flags['C_CONTIGUOUS']: 749 batch = np.ascontiguousarray(batch) 750 751 # write frame 752 buffer = batch.tobytes() 753 if self.pipe.stdin.write( buffer ) < len(buffer): 754 # say to gc that this buffer is no longer needed 755 del buffer 756 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 757 758 # increase frame_counter 759 self.frame_counter.frame_count += (batch.shape[0] * self.frame_size * self.channels) 760 761 # say to gc that this buffer is no longer needed 762 del buffer 763 764 return True
Write a batch of audio frame to the file
Parameters
batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data.
Returns
bool Writing was successful or not.
766 def iter_frames(self, with_timestamps = False): 767 """ 768 Method to iterate on audio frames using AudioIO obj. 769 for audio_frame in obj.iter_frames(): 770 .... 771 772 Parameters 773 ---------- 774 with_timestamps: bool optional (default False) 775 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 776 777 Returns 778 ---------- 779 nparray or FrameContainer 780 A batch of images of shape () 781 """ 782 783 try: 784 if self.mode == PipeMode.READ_MODE: 785 while self.isOpened(): 786 frame = self.readFrame(with_timestamps) 787 if frame is not None: 788 yield frame 789 finally: 790 self.close()
Method to iterate on audio frames using AudioIO obj. for audio_frame in obj.iter_frames(): ....
Parameters
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames
Returns
nparray or FrameContainer A batch of images of shape ()
792 def iter_batches(self, batch_size : int, with_timestamps = False ): 793 """ 794 Method to iterate on batch ofaudio frames using VideoIO obj. 795 for audio_batch in obj.iter_batches(): 796 .... 797 798 Parameters 799 ---------- 800 with_timestamps: bool optional (default False) 801 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 802 """ 803 try: 804 if self.mode == PipeMode.READ_MODE: 805 while self.isOpened(): 806 batch = self.readBatch(batch_size, with_timestamps) 807 if batch is not None: 808 yield batch 809 finally: 810 self.close()
Method to iterate on batch ofaudio frames using VideoIO obj. for audio_batch in obj.iter_batches(): ....
Parameters
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
87 @staticmethod 88 def get_time_in_sec(filename, *, debug=False, logLevel=16): 89 """ 90 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 91 92 Parameters 93 ---------- 94 filename : str or path. 95 Raw audio waveform as a 1D array. 96 97 debug : bool (default False). 98 Show debug info. 99 100 log_level: int (default 16). 101 Log level to pass to the underlying ffmpeg/ffprobe command. 102 103 Returns 104 ---------- 105 float 106 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 107 """ 108 109 cmd = [AudioIO.paramProgram, # ffprobe 110 '-hide_banner', 111 '-loglevel', str(logLevel), 112 '-show_entries', 'format=duration', 113 '-of', 'default=noprint_wrappers=1:nokey=1', 114 filename 115 ] 116 117 if debug == True: 118 print(' '.join(cmd)) 119 120 # call ffprobe and get params in one single line 121 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 122 output = lpipe.stdout.readlines() 123 lpipe.terminate() 124 # transform Bytes output to one single string 125 output = ''.join( [element.decode('utf-8') for element in output]) 126 127 try: 128 return float(output) 129 except (ValueError, TypeError): 130 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
132 @staticmethod 133 def get_params(filename, *, debug=False, logLevel=16): 134 """ 135 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 136 137 Parameters 138 ---------- 139 filename : str or path. 140 Raw audio waveform as a 1D array. 141 142 debug : bool (default (False). 143 Show debug info. 144 145 log_level: int (default 16). 146 Log level to pass to the underlying ffmpeg/ffprobe command. 147 148 Returns 149 ---------- 150 tuple 151 Tuple containing (channels,sample_rate) of the file 152 """ 153 cmd = [AudioIO.paramProgram, # ffprobe 154 '-hide_banner', 155 '-loglevel', str(logLevel), 156 '-show_entries', 'stream=channels,sample_rate', 157 filename 158 ] 159 160 if debug == True: 161 print(' '.join(cmd)) 162 163 # call ffprobe and get params in one single line 164 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 165 output = lpipe.stdout.readlines() 166 lpipe.terminate() 167 # transform Bytes output to one single string 168 output = ''.join( [element.decode('utf-8') for element in output]) 169 170 pattern_sample_rate = r'sample_rate=(\d+)' 171 pattern_channels = r'channels=(\d+)' 172 173 # Search for values in the ffprobe output 174 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 175 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 176 177 # Extraction des valeurs 178 if match_sample_rate: 179 sample_rate = int(match_sample_rate.group(1)) 180 else: 181 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 182 183 if match_channels: 184 channels = int(match_channels.group(1)) 185 else: 186 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 187 188 return (channels,sample_rate) 189 190 # Attributes 191 mode: PipeMode 192 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 193 194 loglevel: int 195 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 196 197 debugModel: bool 198 """ debutMode flag for this object (print debut info, default False)""" 199 200 channels: int 201 """ Number of channels of images (default -1) """ 202 203 sample_rate: int 204 """ sample_rate of images (default -1) """ 205 206 plannar: bool 207 """ Read/write data as plannar, i.e. not interleaved (default True) """ 208 209 pipe: sp.Popen 210 """ pipe object to ffmpeg/ffprobe (default None)""" 211 212 frameSize: int 213 """ Weight in bytes of one image (default -1)""" 214 215 filename: str 216 """ Filename of the file (default None)""" 217 218 frame_counter: FrameCounter 219 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
313 def is_opened(self) -> bool: 314 """ 315 Method to get status of the underlying pipe to ffmpeg. 316 317 Returns 318 ---------- 319 bool 320 True if pipe is opened (reading or writing mode), False if not. 321 """ 322 # is the pip opened? 323 if self.pipe is not None and self.pipe.poll() is None: 324 return True 325 326 return False
Method to get status of the underlying pipe to ffmpeg.
Returns
bool True if pipe is opened (reading or writing mode), False if not.
548 def read_frame(self, with_timestamps = False): 549 """ 550 Read next frame from the audio file 551 552 Parameters 553 ---------- 554 with_timestamps: bool optional (default False) 555 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 556 557 Returns 558 ---------- 559 nparray or FrameContainer 560 A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame 561 of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False. 562 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 563 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 564 """ 565 566 if self.pipe is None: 567 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 568 # - pipe is in write mode 569 if self.mode != PipeMode.READ_MODE: 570 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 571 572 if with_timestamps: 573 # get elapsed time in video, it is time of next frame(s) 574 current_elapsed_time = self.get_elapsed_time() 575 576 # read rgb image from pipe 577 toread = self.frame_size*4 578 buffer = self.pipe.stdout.read(toread) 579 if len(buffer) != toread: 580 # not considered as an error, no more frame, no exception 581 return None 582 583 # get numpy UINT8 array from buffer 584 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 585 586 # make it plannar (or not) 587 if self.plannar: 588 #transpose it 589 audio = audio.T 590 591 # increase frame_counter 592 self.frame_counter.frame_count += (self.frame_size * self.channels) 593 594 # say to gc that this buffer is no longer needed 595 del buffer 596 597 if with_timestamps: 598 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 599 600 return audio
Read next frame from the audio file
Parameters
with_timestamps: bool optional (default False)
If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)
Returns
nparray or FrameContainer
A frame of shape (self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A frame
of shape (self.channels*self.frameSize) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element).
602 def read_batch(self, numberOfFrames, with_timestamps = False): 603 """ 604 Read next batch of audio from the file 605 606 Parameters 607 ---------- 608 number_of_frames: int 609 Number of desired images within the batch. The last batch from the file may have less images. 610 611 with_timestamps: bool optional (default False) 612 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 613 614 Returns 615 ---------- 616 nparray or FrameContainer 617 A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch 618 of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False. 619 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 620 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 621 """ 622 623 if self.pipe is None: 624 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 625 # - pipe is in write mode 626 if self.mode != PipeMode.READ_MODE: 627 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 628 629 if with_timestamps: 630 # get elapsed time in video, it is time of next frame(s) 631 current_elapsed_time = self.get_elapsed_time() 632 633 # try to read complete batch 634 toread = self.frame_size*4*self.channels*numberOfFrames 635 buffer = self.pipe.stdout.read(toread) 636 637 # check if we have at least 1 Frame 638 if len(buffer) < toread: 639 # not considered as an error, no more frame, no exception 640 return None 641 642 # compute actual number of Frames 643 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 644 645 # get and reshape batch from buffer 646 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 647 648 if self.plannar: 649 batch = batch.transpose(0, 2, 1) 650 651 # increase frame_counter 652 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 653 654 # say to gc that this buffer is no longer needed 655 del buffer 656 657 if with_timestamps: 658 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 659 660 return batch
Read next batch of audio from the file
Parameters
number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
Returns
nparray or FrameContainer
A batch of shape (n, self.channels,self.frameSize) as defined in the reader/open call if self.plannar is True. A batch
of shape (n, self.channels*self.frameSize) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).
662 def write_frame(self, audio) -> bool: 663 """ 664 Write an audio frame to the file 665 666 Parameters 667 ---------- 668 audio: nparray 669 The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize). 670 671 Returns 672 ---------- 673 bool 674 Writing was successful or not. 675 """ 676 # Check params 677 # - pipe exists 678 if self.pipe is None: 679 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 680 # - pipe is in write mode 681 if self.mode != PipeMode.WRITE_MODE: 682 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 683 # - shape of image is fine, thus we have pixels for a full compatible frame 684 if audio.shape[0] != self.channels: 685 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 686 # - type of data is Float32 687 if audio.dtype != np.float32: 688 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 689 690 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 691 if not self.plannar: 692 audio = audio.reshape(-1) 693 694 # garantee to have a C continuous array 695 if not audio.flags['C_CONTIGUOUS']: 696 a = np.ascontiguousarray(a) 697 698 # write frame 699 buffer = audio.tobytes() 700 if self.pipe.stdin.write( buffer ) < len(buffer): 701 print( f"Error writing frame to {self.filename}" ) 702 return False 703 704 # increase frame_counter 705 self.frame_counter.frame_count += (self.frame_size * self.channels) 706 707 # say to gc that this buffer is no longer needed 708 del buffer 709 710 return True
Write an audio frame to the file
Parameters
audio: nparray The audio frame to write to the video file of shape (self.channels,self.frameSize) if plannar is True else (self.channels*self.frameSize).
Returns
bool Writing was successful or not.
712 def write_batch(self, batch): 713 """ 714 Write a batch of audio frame to the file 715 716 Parameters 717 ---------- 718 batch: nparray 719 The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data. 720 721 Returns 722 ---------- 723 bool 724 Writing was successful or not. 725 """ 726 # Check params 727 # - pipe exists 728 if self.pipe is None: 729 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 730 # - pipe is in write mode 731 if self.mode != PipeMode.WRITE_MODE: 732 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 733 # batch is 3D (n, channels, nb samples) 734 if batch.ndim !=3: 735 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 736 # - shape of images in batch is fine 737 if batch.shape[1] != self.channels: 738 raise self.AudioIOException("Wrong audio channels in batch: {} expected {}.".format(batch.shape[2], self.channels)) 739 740 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 741 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 742 if not self.plannar: 743 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 744 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 745 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 746 747 # garantee to have a C continuous array 748 if not batch.flags['C_CONTIGUOUS']: 749 batch = np.ascontiguousarray(batch) 750 751 # write frame 752 buffer = batch.tobytes() 753 if self.pipe.stdin.write( buffer ) < len(buffer): 754 # say to gc that this buffer is no longer needed 755 del buffer 756 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 757 758 # increase frame_counter 759 self.frame_counter.frame_count += (batch.shape[0] * self.frame_size * self.channels) 760 761 # say to gc that this buffer is no longer needed 762 del buffer 763 764 return True
Write a batch of audio frame to the file
Parameters
batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,self.frameSize) if plannar is True else (n,self.channels*self.frameSize) of interleaved audio data.
Returns
bool Writing was successful or not.
87 @staticmethod 88 def get_time_in_sec(filename, *, debug=False, logLevel=16): 89 """ 90 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 91 92 Parameters 93 ---------- 94 filename : str or path. 95 Raw audio waveform as a 1D array. 96 97 debug : bool (default False). 98 Show debug info. 99 100 log_level: int (default 16). 101 Log level to pass to the underlying ffmpeg/ffprobe command. 102 103 Returns 104 ---------- 105 float 106 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 107 """ 108 109 cmd = [AudioIO.paramProgram, # ffprobe 110 '-hide_banner', 111 '-loglevel', str(logLevel), 112 '-show_entries', 'format=duration', 113 '-of', 'default=noprint_wrappers=1:nokey=1', 114 filename 115 ] 116 117 if debug == True: 118 print(' '.join(cmd)) 119 120 # call ffprobe and get params in one single line 121 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 122 output = lpipe.stdout.readlines() 123 lpipe.terminate() 124 # transform Bytes output to one single string 125 output = ''.join( [element.decode('utf-8') for element in output]) 126 127 try: 128 return float(output) 129 except (ValueError, TypeError): 130 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
132 @staticmethod 133 def get_params(filename, *, debug=False, logLevel=16): 134 """ 135 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 136 137 Parameters 138 ---------- 139 filename : str or path. 140 Raw audio waveform as a 1D array. 141 142 debug : bool (default (False). 143 Show debug info. 144 145 log_level: int (default 16). 146 Log level to pass to the underlying ffmpeg/ffprobe command. 147 148 Returns 149 ---------- 150 tuple 151 Tuple containing (channels,sample_rate) of the file 152 """ 153 cmd = [AudioIO.paramProgram, # ffprobe 154 '-hide_banner', 155 '-loglevel', str(logLevel), 156 '-show_entries', 'stream=channels,sample_rate', 157 filename 158 ] 159 160 if debug == True: 161 print(' '.join(cmd)) 162 163 # call ffprobe and get params in one single line 164 lpipe = sp.Popen(cmd, stdout=sp.PIPE) 165 output = lpipe.stdout.readlines() 166 lpipe.terminate() 167 # transform Bytes output to one single string 168 output = ''.join( [element.decode('utf-8') for element in output]) 169 170 pattern_sample_rate = r'sample_rate=(\d+)' 171 pattern_channels = r'channels=(\d+)' 172 173 # Search for values in the ffprobe output 174 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 175 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 176 177 # Extraction des valeurs 178 if match_sample_rate: 179 sample_rate = int(match_sample_rate.group(1)) 180 else: 181 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 182 183 if match_channels: 184 channels = int(match_channels.group(1)) 185 else: 186 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 187 188 return (channels,sample_rate) 189 190 # Attributes 191 mode: PipeMode 192 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 193 194 loglevel: int 195 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 196 197 debugModel: bool 198 """ debutMode flag for this object (print debut info, default False)""" 199 200 channels: int 201 """ Number of channels of images (default -1) """ 202 203 sample_rate: int 204 """ sample_rate of images (default -1) """ 205 206 plannar: bool 207 """ Read/write data as plannar, i.e. not interleaved (default True) """ 208 209 pipe: sp.Popen 210 """ pipe object to ffmpeg/ffprobe (default None)""" 211 212 frameSize: int 213 """ Weight in bytes of one image (default -1)""" 214 215 filename: str 216 """ Filename of the file (default None)""" 217 218 frame_counter: FrameCounter 219 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
35 class AudioIOException(Exception): 36 """ 37 Dedicated exception class for AudioIO class. 38 """ 39 def __init__(self, message="Error while reading/writing video occurs"): 40 self.message = message 41 super().__init__(self.message)
Dedicated exception class for AudioIO class.
43 class AudioFormat(Enum): 44 """ 45 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 46 """ 47 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment)
Enum class for supported input video type: 32-bit float is the only supported type for the moment.