Source code for genetic_codes

import numpy
import copy

[docs]class GeneticCodeMutation(): def __init__(self, code, codon, aa): self.code_matrix = code.get_code_matrix() self.code_matrix[codon,:] = 0 self.code_matrix[codon][aa] = 1 if code.misreading: misreading_matrix = code.misreading.get_misreading_matrix() self.effective_code_matrix = numpy.dot(self.code_matrix, misreading_matrix) else: self.effective_code_matrix = self.code_matrix self.event = {codon: aa} self.codon = codon self.amino_acid = aa
[docs] def get_effective_code_matrix(self): return self.effective_code_matrix.copy()
class _GeneticCode(): """ This private base class """ def __init__(self, codons, amino_acids, misreading): self.codons = codons self.num_codons = codons.num_codons self.amino_acids = amino_acids self.num_amino_acids = amino_acids.num_amino_acids self.misreading = misreading self.mutation_history = [] self.num_mutations = 0 self.num_mutations_by_codon = [0] * self.num_codons self.redundancy = [0] * self.num_amino_acids self.initial_code_as_dict = {} self.current_code_as_dict = {} self.codon_set = set(range(self.num_codons)) self.amino_acid_set = set(range(self.num_amino_acids)) self.explicit_codons = set() self.encoded_amino_acids = set() self.reassignments_before_explicit = 0 self.reassignments_after_explicit = 0 def get_code_matrix(self): return self.code_matrix.copy() def get_effective_code_matrix(self): if self.misreading: misreading_matrix = self.misreading.get_misreading_matrix() ## this mirrors what brian did, but needs to be checked. return numpy.dot(self.code_matrix,misreading_matrix) else: return self.code_matrix.copy() def mutate(self, codon, aa): return GeneticCodeMutation(self,codon,aa) def update(self, code_mutation): self.code_matrix = code_mutation.code_matrix self.mutation_history.append(code_mutation.event) self.num_mutations += 1 self.num_mutations_by_codon[code_mutation.codon] += 1 old_encoded_amino_acid = None if (code_mutation.codon in self.explicit_codons): old_encoded_amino_acid = self.current_code_as_dict[code_mutation.codon] self.current_code_as_dict.update(code_mutation.event) if self.is_explicit(): self.reassignments_after_explicit += 1 elif self.is_explicit(code_mutation.codon): self.reassignments_before_explicit += 1 if old_encoded_amino_acid: self.redundancy[old_encoded_amino_acid] -= 1 if self.redundancy[old_encoded_amino_acid] == 0: self.encoded_amino_acids.discard(old_encoded_amino_acid) self.redundancy[code_mutation.amino_acid] += 1 self.explicit_codons.add(code_mutation.codon) self.encoded_amino_acids.add(code_mutation.amino_acid) def as_dict(self): return self.current_code_as_dict.copy() def as_labelled_dict(self): current = self.current_code_as_dict.copy() for codon in self.explicit_codons: current[codon] = self.amino_acids.label(current[codon]) return current def num_ambiguous_codons(self): return self.num_codons - len(self.explicit_codons) def num_explicit_codons(self): return len(self.explicit_codons) def num_reassignments(self, codon = None): if codon and self.is_explicit(codon): return self.num_mutations_by_codon[codon] - 1 elif codon: return 0 else: return self.reassignments_before_explicit + self.reassignments_after_explicit def num_reassignments_before_explicit(self): return self.reassignments_before_explicit def num_reassignments_after_explicit(self, codon = None): return self.reassignments_after_explicit def num_encoded_amino_acids(self): return len(self.encoded_amino_acids) def num_unencoded_amino_acids(self): return self.num_amino_acids - len(self.encoded_amino_acids) def redundancy(self,amino_acid = None): if amino_acid: return self.redundancy[amino_acid] else: return (1 - ((self.num_encoded_amino_acids - 1) / (self.num_codons - 1))) def normalized_encoded_range(self): if self.amino_acids.__class__.__name__ == 'RingAminoAcidSpace': return 'NA' ## import pdb ## pdb.set_trace() mc = self.amino_acids.min_coord Mc = self.amino_acids.max_coord coords = self.amino_acids.coords m = min(map (lambda(x):coords[x],self.encoded_amino_acids)) M = max(map (lambda(x):coords[x],self.encoded_amino_acids)) return numpy.asscalar((M - m)/(Mc - mc)) ## def get_reassignments(self,codon): ## pass def is_ambiguous(self,codon = None): if codon: return (codon not in self.explicit_codons) else: return (len(self.explicit_codons) == 0) def is_explicit(self,codon = None): if codon: return (codon in self.explicit_codons) else: return (self.codon_set == self.explicit_codons) def ambiguous_codons(self): return (self.codon_set - self.explicit_codons) def unencoded_amino_acids(self): return (self.amino_acid_set - self.encoded_amino_acids) def is_encoded(self,amino_acid): return (amino_acid in self.encoded_amino_acids) def is_unencoded(self,amino_acid): return (amino_acid not in self.encoded_amino_acids) def encodes(self, codon, aa): return (self.code_matrix[codon][aa] == 1) def __str__(self): return self.codons.__str__(self.as_labelled_dict())
[docs]class InitiallyAmbiguousGeneticCode(_GeneticCode): def __init__(self, codons, amino_acids, misreading = None): _GeneticCode.__init__(self, codons, amino_acids, misreading) nc = codons.num_codons na = amino_acids.num_aas self.code_matrix = numpy.ones((nc,na)) / na for c in xrange(nc): self.initial_code_as_dict[c] = '*' self.current_code_as_dict = self.initial_code_as_dict.copy() self.explicit_codons = set() self.encoded_amino_acids = set()
[docs]class UserInitializedGeneticCode(_GeneticCode): """ User-Initialized Genetic Codes are initialized with a numpy.ndarray code matrix or a dict of codons mapping to indices (not labels) of amino acids >>> codons = codon_spaces.WordCodonSpace(num_bases = 4,num_positions = 2, mu = 0.2,kappa = 2) >>> aas = amino_acid_spaces.RegionAminoAcidSpace(num_aas = 20, seed = 40) >>> cm = numpy.eye(16) >>> cm = numpy.hstack((cm,numpy.zeros((16,4)))) >>> cm.shape (16, 20) >>> cm[0][1] = 1 >>> cm /= cm.sum(axis = 1).reshape(16,1) >>> gc = UserInitializedGeneticCode(codons,aas,code_matrix = cm) >>> gc.num_codons 16 >>> gc.num_amino_acids 20 >>> gc.ambiguous_codons() set([0]) >>> gc.encoded_aas set([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) >>> print gc |* b c d| |e f g h| |i j k l| |m n o p| >>> gc.as_labelled_dict() {0: '*', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 15: 'p'} >>> gc.as_dict() {0: '*', 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} >>> gc2 = UserInitializedGeneticCode(codons,aas,code_dict = {0: '*', 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15}) >>> print gc2 |* b c d| |e f g h| |i j k l| |m n o p| >>> print gc2.code_matrix[0] [ 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05] """ def __init__(self, codons, amino_acids, code_matrix = None, code_dict = None, misreading = None): _GeneticCode.__init__(self, codons, amino_acids, misreading) nc = codons.num_codons na = amino_acids.num_aas ## check that the passed-in code matrix size fits the aa and codon spaces passed in ## accept dictionaries and code matrices ## make sure codon vectors add to one if not code_matrix == None: self.code_matrix = numpy.empty((nc,na)) self.code_matrix = code_matrix ## set _initial_code accordingly (explicit_codons,encoded_aas) = numpy.nonzero(code_matrix == 1) self.explicit_codons = set(explicit_codons) self.encoded_aas = set(encoded_aas) self.initial_code_as_dict = dict(zip(explicit_codons,encoded_aas)) for c in self.ambiguous_codons(): self.initial_code_as_dict[c] = '*' elif code_dict: self.code_matrix = numpy.zeros((nc,na)) for (codon, aa) in code_dict.items(): if aa == '*': self.code_matrix[codon] = numpy.ones((1,na)) / na else: self.code_matrix[codon][aa] = 1 self.explicit_codons.add(codon) self.encoded_amino_acids.add(aa) self.initial_code_as_dict = code_dict.copy() self.current_code_as_dict = self.initial_code_as_dict.copy()
if __name__ == "__main__": import doctest import amino_acid_spaces import codon_spaces import numpy doctest.testmod()