Source code for pacbio_data_processing.blasr

#######################################################################
#
# Copyright (C) 2021, 2022 David Palao
#
# This file is part of PacBioDataProcessing.
#
#  PacBioDataProcessing is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  PacBio data processing is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with PacBioDataProcessing. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################

import subprocess
import logging
from pathlib import Path
from typing import Optional

from .types import PathOrStr, ReturnCode
from .sentinel import Sentinel, SentinelFileFound, SentinelFileNotFound


[docs]class Blasr: """An object to interact with the ``blasr`` aligner. """
[docs] def __init__(self, path: PathOrStr) -> None: """The path to the blasr program is the only invariant.""" self.program = path
[docs] def __call__(self, in_bamfile: PathOrStr, fasta: PathOrStr, out_bamfile: PathOrStr, nprocs: int = 1) -> Optional[ReturnCode]: """It runs the ``blasr`` executable, with the given paramenters. The return code of the associated process is returned by this method *if* ``blasr`` could run at all, else ``None`` is returned. One case where ``blasr`` cannot run is when the sentinel file is there *before* the ``blasr`` process is run. """ try: sentinel = Sentinel(Path(out_bamfile)) with sentinel: blasr_proc = subprocess.run( (self.program, in_bamfile, fasta, "--nproc", f"{nprocs}", "--bam", "--out", out_bamfile), capture_output=True ) result = blasr_proc.returncode except SentinelFileFound: result = None logging.warning( f"Sentinel file '{sentinel.path}' exists! " "Skipping blasr computation." ) except SentinelFileNotFound: logging.warning( f"Sentinel file '{sentinel.path}' disappeared before blasr " "finished its computation!" ) logging.warning( " ...some other person/process is probably carrying out a " "similar computation in the same directory and messing up." ) logging.warning( " The integrity of the results may be compromised!" ) else: if result == 0: logging.info(f"[blasr] Aligned file '{out_bamfile}' generated") else: logging.error( f"'{self.program}' could not align the input file " f"'{in_bamfile}'") msg = blasr_proc.stderr.decode() logging.error(f" ...the error was: {msg}") return result