ontmont package#

Submodules#

ontmont.bundle module#

ontmont.bundle.make_brk_supports(bundle)[source]#

Count supports for unique breakpoint coordinates

Parameters:

bundle (list) – List of BreakpointChain

Returns:

pandas.Series – Count of unique breakpoints, taking chrom, pos, ori into account

ontmont.bundle.make_brk_table(bundle, brk_supports, unilateral_score_cutoff=5, bilateral_score_cutoff=8)[source]#

Create a dataframe of breakpoints

Parameters:
  • bundle (list) – List of BreakpointChain variables

  • brk_supports (dict | pandas.Series) – Number of support for each breakpoint coordinate

  • unilateral_score_cutoff (int, optional) – Alignment score cutoff for unilateral IR. Defaults to 5.

  • bilateral_score_cutoff (int, optional) – Alignment score cutoff for bilateral IR. Defaults to 8.

Returns:

pandas.DataFrame – table of breakpoint coordinates with support and IR statistics

ontmont.bundle.make_brks_bundle(reads_df, genome, sw_palindrome, sw_holliday, margins=[15, 30, 60])[source]#

Make a list of BreapointChain based on alignment table, genome, and alignment parameters

Parameters:
  • reads_df (pandas.DataFrame) – Table of read alignment statistics

  • genome (pyfaidx.Fasta) – Genome fasta

  • sw_palindrome (swalign.LocalAlignment) – Parameters for detecting IR

  • sw_holliday (swalign.LocalAlignment) – Parameters for detecting homology

  • margins (list, optional) – Bases to slice from breakpoints. Defaults to [15, 30, 60].

Returns:

list – List of BreakpointChain

ontmont.bundle.make_seg_table(bundle, seg_supports, segment_score_cutoff=5)[source]#

Create a dataframe based on a BreakpointChain bundle and supports dict

Parameters:
  • bundle (list) – List of BreakpointChain variables

  • seg_supports (dict | pandas.Series) – Number of support for each breakpoint coordinate

  • segment_score_cutoff (int, optional) – Alignment score cutoff the IR found in the segment. Defaults to 5.

Returns:

pandas.DataFrame – table of segments coordinate with supports and IR statistics

ontmont.bundle.make_tra_table(bundle, tra_supports)[source]#

Make a table of SVs based on bundle and number of supports

Parameters:
  • bundle (list) – List of BreakpointChain

  • tra_supports (dict | pandas.Series) – Support count for SVs

Returns:

pandas.DataFrame – Table of SVs, duplicate removed, removed if located on contig termini

ontmont.cli module#

ontmont.collect module#

ontmont.collect.extract_split_alignments(reads, max_reads=500)[source]#

Extract SplitAlignment objects from IteratorRow with a max_reads parameter

Parameters:
  • reads (pysam.IteratorRow) – Reads fetched from a pysam.Alignmentfile

  • max_reads (int, optional) – Number of reads to extract at maximum. Defaults to 500.

Returns:

list – list of SplitAlignment objects

ontmont.collect.find_presence_of_matching_sv(sv1, sv2, margin=50)[source]#

Check overlap of sv2 for sv1 table

Parameters:
  • sv1 (pandas.DataFrame) – SV table to label matching SVs

  • sv2 (pandas.DataFrame) – SV table reference to check presence of overlap

  • margin (int, optional) – Margin (bp) of breakpoint coordinate difference. Defaults to 50.

Returns:

pd.Series{True, False} list of matches. Length equal to sv1 row size.

ontmont.collect.fix_lower_support_coordinates(complexes, coord_map)[source]#

Map breakpoint of lower support to close-by breakpoint with higher support

Parameters:
  • complexes (list) – List of BreakpointChain

  • coord_map (dict) – Map of str(Breakpoint) coordinates

Returns:

list – List of BreakpointChain, mapped to fixed coordinates

ontmont.collect.get_breakpoint_support_from_bundle(complexes)[source]#

Get breakpoint support count

Parameters:

complexes (list) – List of BreakpointChain

Returns:

collections.Counter – Support for str(Breakpoint) coordinates

ontmont.collect.get_normalized_sv(tra)[source]#

Sort (normalize) a BreakpointPair

Parameters:

tra (BreakpointPair) – Pair of breakpoints

Returns:

list – Sorted breakpoint coordinates, flattened

ontmont.collect.get_svtype(tra)[source]#

Get SV type string for a given BreakpointPair

Parameters:

tra (BreakpointPair) – Paired breakpoint object

Raises:

ValueError – If no SV type has been assigned

Returns:

str – SV type string

ontmont.collect.make_brks_bundle(reads_df)[source]#

Make a list of BreapointChain based on alignment table

Parameters:

reads_df (pandas.DataFrame) – Table of read alignment statistics

Returns:

list – List of BreakpointChain

ontmont.collect.make_tumor_sv_table(complexes, sv=None, margin=10, get_support=True)[source]#

Make SV table from list of BreakpointChain

Parameters:
  • complexes (list) – List of BreakpointChain

  • sv (pandas.DataFrame, optional) – Table of source SVs as reference for in_source flag. Defaults to None

  • margin (int, optional) – Margin (bp) for merging clustered breakpoints. Defaults to 10.

  • get_support (bool, optional) – Merge breakpoints with same coordinates and add count as support. Defaults to True.

Returns:

pandas.DataFrame – SV table from bundle [, with in_source labels] [, collapsed by coordinate with support counts]

ontmont.collect.map_similar_coordinate_to_higher_rank(complexes, breakpoint_support, margin=10)[source]#

Make mapping of close-by coordinates, with breakpoints of higher support taking priority

Parameters:
  • complexes (list) – List of BreakpointChain

  • breakpoint_support (dict | collections.Counter) – Support for breakpoint coordinates

  • margin (int, optional) – Margin (bp) to merge close-by coordinates. Defaults to 10.

Returns:

tuple – tuple containing:

coord_map (dict): src -> dst coordinate

coord_map_log (tuple): (max_coord, src_count, max_count) [only for debugging]

ontmont.collect.normalize_sv_table(sv, chrom1_col='chromosome_1', chrom2_col='chromosome_2', pos1_col='position_1', pos2_col='position_2', ori1_col='strand_1', ori2_col='strand_2')[source]#

Sort breakpoint1 and breakpoint2 of a SV table

Parameters:
  • sv (pandas.DataFrame) – Table of SVs

  • chrom1_col (str, optional) – Defaults to ‘chromosome_1’.

  • chrom2_col (str, optional) – Defaults to ‘chromosome_2’.

  • pos1_col (str, optional) – Defaults to ‘position_1’.

  • pos2_col (str, optional) – Defaults to ‘position_2’.

  • ori1_col (str, optional) – Defaults to ‘strand_1’.

  • ori2_col (str, optional) – Defaults to ‘strand_2’.

Returns:

pandas.DataFrame – Sorted (normalized) SV table

ontmont.collect.pull_breakpoints_from_bam_files(bam_paths, sv, get_read_table=False)[source]#

Get BreakpointChain list from BAM file according to an input SV table

Parameters:
  • bam_paths (pysam.AlignmentFile) – BAM file

  • sv (pandas.DataFrame) – SV table

  • get_read_table (bool, optional) – Return read table as well. Defaults to False.

Returns:

list [, pandas.DataFrame] – List of BreakpointChain [, table of read alignment stats]

ontmont.collect.pull_breakpoints_from_reads_in_sv_regions(bam, tra, get_read_table=False, min_n_breakpoint=3, margin=10)[source]#

Extract and append BreakpointChain objects from a bam file and a table of SVs

Parameters:
  • bam (pysam.AlignmentFile) – BAM file

  • tra (pandas.DataFrame) – Table of SVs

  • get_read_table (bool, optional) – Return table of read alignment stats. Defaults to False.

  • min_n_breakpoint (int, optional) – Minimum number of breakpoints required to be saved. Useful in selecting complex rearrangements if the number is high. Defaults to 3.

  • margin (int, optional) – Margin (bp) from breakpoints to fetch reads. Defaults to 10.

Returns:

list – List of BreakpointChain

ontmont.collect.pull_sv_supporting_reads_from_bundle(sv, bundle)[source]#

Filter bundle to include BreakpointChain objects that have breakpoints matching that of the input sv table

Parameters:
  • sv (pandas.DataFrame) – SV table

  • bundle (list) – list of BreapointChain

Returns:

list – Filtered list of BreakpointChain

ontmont.datatypes module#

class ontmont.datatypes.Breakpoint(chrom, pos, orientation)[source]#

Bases: object

chroms = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'M', 'chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX', 'chrY', 'chrM']#
get_breakpoint_seqs(margin, genome)[source]#
class ontmont.datatypes.BreakpointChain(brks_iterable)[source]#

Bases: list

get_segments()[source]#
get_transitions(sort_transition=False)[source]#
class ontmont.datatypes.BreakpointPair(brk1, brk2)[source]#

Bases: object

class ontmont.datatypes.Sample(long_sample)[source]#

Bases: object

long_samples = ['14472B_201', '14472B_202', '14472B_500', '14472B_501', '14472C_203', '14472C_204', '14472C_502', '14472D_101', '14472D_102', '14472D_103', '14472D_104', '14472D_205', '14472_100', '14472_300']#
sample2group = {'14472B_201': '2', '14472B_202': '2', '14472B_500': '5', '14472B_501': '5', '14472C_203': '2', '14472C_204': '2', '14472C_502': '5', '14472D_101': '1', '14472D_102': '1', '14472D_103': '1', '14472D_104': '1', '14472D_205': '2', '14472_100': '1', '14472_300': '3'}#
sample2short = {'14472B_201': '201', '14472B_202': '202', '14472B_500': '500', '14472B_501': '501', '14472C_203': '203', '14472C_204': '204', '14472C_502': '502', '14472D_101': '101', '14472D_102': '102', '14472D_103': '103', '14472D_104': '104', '14472D_205': '205', '14472_100': '100', '14472_300': '300'}#
class ontmont.datatypes.Segments(df)[source]#

Bases: object

get_list()[source]#
class ontmont.datatypes.SplitAlignment(cigarstring, read_name, refname, read_pos, strand)[source]#

Bases: object

extract_cigar_field()[source]#
static get_cigar_tuples(cigarstring)[source]#

Returns a cigar tuple from a CIGAR string

class ontmont.datatypes.Transitions(df)[source]#

Bases: object

get_list()[source]#
ontmont.datatypes.get_breakpoint_seqs(chrom, pos, margin, genome)[source]#

ontmont.irs module#

ontmont.irs.align_two_sequences(seq1, seq2, sw, rc=True)[source]#
ontmont.irs.calc_distance_score(dist1, dist2, dist_cutoff1, dist_cutoff2)[source]#
ontmont.irs.calc_pval_bilateral_ir(seq1, seq2, sw, src_score, dist_cutoff1, dist_cutoff2, n_iter=1000, random_seed=42)[source]#
ontmont.irs.calc_pval_holliday(seq1, seq2, negative1, negative2, src_score, sw, dist_cutoff1=1, dist_cutoff2=3, n_iter=1000, random_seed=42)[source]#
ontmont.irs.calc_pval_onesided_ir(seq, sw, direction, src_score, dist_cutoff, n_iter=1000, random_seed=42)[source]#
ontmont.irs.calc_pval_segmental_ir(seq1, seq2, ori1, ori2, sw, src_score, dist_cutoff1, dist_cutoff2, n_iter=1000, random_seed=42)[source]#
ontmont.irs.get_best_holliday_junctions(pair, sw, genome, score_cutoff=4, dist_cutoff1=2, dist_cutoff2=5, margins=[15, 30, 60])[source]#
ontmont.irs.get_best_ir_within_breakpoints(seq1, seq2, sw, dist_cutoff1=1, dist_cutoff2=3, margins=[15, 30, 60])[source]#
ontmont.irs.get_best_ir_within_segment(pair, sw, genome, dist_cutoff1=1, dist_cutoff2=3, margins=[15, 30, 60])[source]#
ontmont.irs.get_best_onesided_ir(seq, direction, sw, dist_cutoff=1, margins=[15, 30, 60])[source]#
ontmont.irs.get_breakpoint_pair_seq_data(pair)[source]#
ontmont.irs.get_onesided_ir(seq, sw)[source]#
ontmont.irs.is_holliday_junction(aln, negative1, negative2, dist_cutoff1=1, dist_cutoff2=3, dist_sum_cutoff=4, score_cutoff=4)[source]#
ontmont.irs.overlaps(seg1, seg2)[source]#

ontmont.plot module#

ontmont.plot.add_gene_annotations(ax, gtf, chromosome, start, end, genes=None, already_plotted={}, gene_font_size=8)[source]#
ontmont.plot.add_sv_legend_to_axes(sv_axes, svcolors, supports, alpha=0.5, loc1=(0.82, 0.5), loc2=(0.82, 0.0), size=8, alignment='left', show_support_legend=True)[source]#
ontmont.plot.convert_tupleproxy_to_pyranges(exon)[source]#
ontmont.plot.extract_vaf_from_annotated_sv_table(tra, sv, margin=10)[source]#
ontmont.plot.fix_coordinate_xticklabels(sv_axes)[source]#
ontmont.plot.fix_yticks_with_integers(cn_axes)[source]#
ontmont.plot.get_chrom_xlims(read_svs, read_chroms, sv_x_length_margin)[source]#
ontmont.plot.get_chromosomes_from_bundle(bundle)[source]#
ontmont.plot.get_cn_blocks_for_read_xlim(cn, read_chroms, chrom_xlims, bin_size=50000, clones=['Pseudobulk'])[source]#
ontmont.plot.get_edgecolor(svtype)[source]#
ontmont.plot.get_gene_repr_exons(gtf, gene, lenient=False)[source]#
ontmont.plot.get_repr_transcript_id(gtf, gene_name, lenient=False)[source]#
ontmont.plot.get_sv_table_chroms(sv)[source]#
ontmont.plot.get_transcript_exons(gtf, transcript_id)[source]#
ontmont.plot.get_unique_sv_with_support(bundle)[source]#
ontmont.plot.is_overlap(interval1, interval2)[source]#
ontmont.plot.make_axes_for_breakpoints(brk_chroms, figsize=(10, 4), hspace=1.5, rowsizes=(1, 4, 5, 1), modes=['vaf', 'cn', 'sv', 'gene'], width_ratios=None)[source]#
ontmont.plot.make_spline_coordinates(pos1, pos2, sv_y=1, margin_div=5, y_offset=0.05)[source]#
ontmont.plot.parse_gtf_region(gtf, region)[source]#
ontmont.plot.plot_cn_on_axes(cn, cn_axes, clone_colors, bin_size=50000, metric='copy', max_cn=6)[source]#
ontmont.plot.plot_gene_annotations(ax, gtf, chrom, input_svs, margin=50000, gene_font_size=8)[source]#
ontmont.plot.plot_jabba_cn(chrom_cn, ax, cn_colors, ylim=(0, 6))[source]#
ontmont.plot.plot_quasijabba(input_svs, cn, cn_clones=['Pseudobulk'], figsize=(12, 4), svlineh=0.8, width_ratios=None, alpha=0.3, loc1=(0.83, 0.75), default_rad=0.25, suptitle='', read_length=None, gtf=None, gene_margin=50000, bin_size=50000, linewidth=0, show_support_legend=True, gene_font_size=8, sv_x_length_margin=0.2, intra_y_offset=1, linetick_divfactor=3, ylim=(0, 10))[source]#
ontmont.plot.plot_segments_on_axes(plot_data, sv_axes)[source]#
ontmont.plot.plot_sv_lines_on_cn(plot_data, cn_axes)[source]#
ontmont.plot.plot_svs_to_cn_segments(input_svs, axes, chrom_cns, svlineh, alpha, _linewidth=0, intra_y_offset=1, linetick_divfactor=3, default_rad=0.25, sv_x_length_margin=0.2, sv_x_offset=100000)[source]#
ontmont.plot.plot_transition_on_axes(plot_data, sv_axes, alpha=0.5)[source]#
ontmont.plot.plot_vaf_cn_sv_for_reads(plot_data, sv, cn, clone_ids, fig_row_sizes=(15, 40, 50), cn_window=200000, cn_metric='state')[source]#
ontmont.plot.plot_vaf_on_axes(plot_data, vaf_axes, clone_colors, flt_ont_sv)[source]#

ontmont.process module#

ontmont.process.filter_sv_by_clone(sv, vaf_col_str='vaf_')[source]#
ontmont.process.merge_cn_segments(cn, merge_gap=100000)[source]#
ontmont.process.update_blocks_and_reset_prev(blocks, prev, row, features=['clone_id', 'chr', 'start', 'end', 'state'])[source]#

ontmont.utils module#

ontmont.utils.enumerate_breakpoints(df)[source]#
ontmont.utils.extract_split_alignments(bam, chroms_proc)[source]#
ontmont.utils.filter_breakpoints_at_contig_ends(brk_df)[source]#
ontmont.utils.filter_sv_with_breakpoint_at_contig_ends(df)[source]#
ontmont.utils.get_chromosomes_to_process(bam, drop_expression_vectors=False)[source]#
ontmont.utils.get_secondaries(read)[source]#
ontmont.utils.is_breakpoints_not_sorted(chrom1, pos1, chrom2, pos2, chrom_order)[source]#
ontmont.utils.make_split_read_table(alignments)[source]#
ontmont.utils.remove_duplicates_from_tra_table(tra_df)[source]#
ontmont.utils.reverse_complement(seq)[source]#
ontmont.utils.shuffle_seq(seq)[source]#

Module contents#