1 """
2 This is automatically generated documentation and should
3 not be relied on for the API. Please
4 see the official documentation at http://pythonhosted.org/tfasta/.
5
6 The only things that should be used externally from this module are
7 I{TEMPLATES}, a C{dict} of C{FastaTemplate} instances, or
8 C{FastaTemplate} itself. A dictionary is
9 used so that templates can be selected dynamically at run-time.
10
11 Template types registered in I{TEMPLATES} are:
12
13 - B{I{default}} - plain old fasta line
14 - B{name} - everything after the ">"
15 - B{I{swissprot}} - fasta files from swissprot
16 - B{gi_num} - between first set of "|"s
17 - B{accession} - between 3rd and 4th "|"
18 - B{description} - after last "|"
19 - B{I{pdb}} - the fasta file of the entire pdb
20 - B{idCode} - first four characters after ">"
21 - B{chainID} - any non-whitespace characters after first "_"
22 - B{type} - non-whitespace immediately following first ":"
23 - B{numRes} - numbers immediatly following first ":"
24 - B{description} - stripped characters after I{numRes}
25 - B{I{nr}} - the protein non-redundant database
26 - B{gi} - between first set of "|"s
27 - B{accession} - between 3rd and 4th "|"
28 - B{description} - stripped characters before brackets
29 - B{source} - stripped characters inside brackets
30 - B{I{nrblast}} - fasta file produced from blast output of the nr
31 - B{gi} - between first set of "|"s
32 - B{accession} - between 3rd and 4th "|"
33
34 @var TEMPLATES: a C{dict} holding instances of C{FastaTemplate}
35 used for parsing
36 """
37
38 import re
39
40
41
42
44 """
45 This class encapsulates template information for parsing fasta
46 files. Wraps a regular expression (I{regex}) used to parse the first
47 line of a fasta record and also a C{tuple} of C{str}ings (I{fields})
48 that name the information contained in the first line of the fasta
49 record.
50
51 @type regex: _sre.SRE_Pattern
52 @type fields: tuple
53 """
54
55
56
57
59 """
60 @param regex: the compiled C{_sre.SRE_Pattern} with which to
61 parse the file
62 @type regex: _sre.SRE_Pattern
63 @param fields: a C{tuple} of C{str}ings containing names of the
64 fields found by parsing the first line of the
65 fasta record
66 @type fields: tuple
67 """
68
69
70 if isinstance(regex, basestring):
71 regex = re.compile(regex)
72 self.regex = regex
73 self.fields = fields
74
75
76
77
78 - def match(self, astring):
79 """
80 Returns a C{_sre.SRE_Match} object describing the results of using
81 I{self._regex} to search I{string}.
82
83 @param astring: a string generally containing a line of the fasta
84 file being processed
85 @type astring: str
86
87 @return: C{_sre.SRE_Match} object describing the results of using
88 I{self._regex} to search I{string}
89 @rtype: _sre.SRE_Match
90 """
91 return self.regex.match(astring)
92
93
94
95
96
98 """
99 Sets the I{regex} property to I{rgx}, a C{_sre.SRE_Pattern}.
100
101 @param rgx: a compiled regular expression of the re module
102 @type rgx: _sre.SRE_Pattern
103 """
104 self._regex = rgx
105
106
107
108
110 """
111 Sets the I{fields} property to I{ary}, a C{tuple} of C{str}ings.
112
113 @param ary: a C{tuple} of C{str}ings naming the fields of the
114 type of fasta records
115 @type ary: tuple
116 """
117 self._fields = tuple(ary)
118
119
120
121
123 """
124 Returns the I{regex} property.
125
126 @return: the I{regex} property
127 @rtype: _sre.SRE_Pattern
128 """
129 return self._regex
130
131
132
133
135 """
136 Returns the I{fields} property.
137
138 @return: the I{fields} propery
139 @rtype: tuple
140 """
141 return self._fields
142
143
144
145
147 """
148 Given the C{int} index I{n}, return the field at that index.
149
150 @return: the name of the field in the I{fields} property at
151 the index I{n}
152 @rtype: str
153 """
154 return self.fields[n]
155
156
157
158
159 regex = property(get_regex, set_regex)
160 fields = property(get_fields, set_fields)
161
162
163
164
165
167 """
168 This C{class} is essentially a namespace to hold some values that
169 will be used to provide templates for the I{TEMPLATES} C{dict}.
170 """
171
172
173
175 """
176 Will raise a C{RuntimeError} if called.
177
178 @raise RuntimeError: raises a C{RuntimeError} under all
179 circumstances
180 """
181 raise RuntimeError, "This class can not be instantiated."
182
183
184
185
186 _default_regex = re.compile(r'^>\ *(.*)$')
187 _default_fields = ("name",)
188 _default_template = FastaTemplate( _default_regex,
189 _default_fields )
190
191
192
193 _swissprot_regex = re.compile(r'^>gi\|([^|]*)\|sp\|([^|]*)\|(.*)$')
194 _swissprot_fields = ("gi_num","accession","description")
195 _swissprot_template = FastaTemplate( _swissprot_regex,
196 _swissprot_fields )
197
198
199
200 _pdb_regex = re.compile(r'^>(....)_(\S*)\s+[^:]*:(\S*)\s+length:(\S*)\s+(\S*.*)$')
201 _pdb_fields = ("idCode", "chainID", "type", "numRes", "description")
202 _pdb_template = FastaTemplate(_pdb_regex, _pdb_fields)
203
204
205
206
207 _nr_regex = re.compile(r'^>gi\|([^|]*)\|[^|]*\|([^|]*)\|\s*([^\[]*)\s*\[([^\]]*)\]\s*$')
208 _nr_fields = ("gi", "accession", "description", "source")
209 _nr_template = FastaTemplate(_nr_regex, _nr_fields)
210
211
212
213
214 _nrblast_regex = re.compile(r'^>gi\|([^|]*)\|[^|]*\|([^|]*)\|.*$')
215 _nrblast_fields = ("gi", "accession")
216 _nrblast_template = FastaTemplate(_nrblast_regex, _nrblast_fields)
217
218
219
220
221
222 TEMPLATES = {
223 "default" : _t._default_template,
224 "swissprot" : _t._swissprot_template,
225 "pdb" : _t._pdb_template,
226 "nr" : _t._nr_template,
227 "nrblast" : _t._nrblast_template
228 }
229