1 """
2 HHpred-related format parsers.
3
4 This module defines two HHpred format parsers: L{HHProfileParser} and
5 L{HHOutputParser}. The first one is used to read HMM (*.hhm) files:
6
7 >>> HHProfileParser("profile.hhm", "profile.pdb").parse()
8 <ProfileHMM> # ProfileHMM object
9
10 while the latter parses HHsearch results files (*.hhr):
11
12 >>> HHOutputParser().parse_file("hits.hhr"):
13 <HHpredHitList> # collection of HHpredHit-s
14
15 See L{ProfileHMM}, L{HHpredHitList} and L{HHpredHit} from L{csb.bio.hmm}
16 for details. For text serialization of HMM profiles, see L{HHMFileBuilder}
17 in this module.
18 """
19
20 import os
21 import re
22
23 import csb.io
24 import csb.bio.io
25 import csb.bio.structure as structure
26
27 from csb.core import Enum, CollectionIndexError, ItemNotFoundError
28 from csb.bio.sequence import Sequence, ProteinAlphabet, A3MAlignment
29
30 from csb.bio.hmm import State, Transition, ProfileHMM, HMMLayer, ProfileLength
31 from csb.bio.hmm import HHpredHitList, HHpredHit, ScoreUnits, States, EVDParameters
32 from csb.bio.hmm import StateNotFoundError, TransitionNotFoundError
37
40
43
46 """
47 A class that is HHpred HMM format aware.
48 Produces a L{ProfileHMM} object representation of a given HHpred profile
49 HMM.
50
51 @param hhm_file: *.hhm file name to parse
52 @type hhm_file: str
53 @param pdb_file: an optional hhm structure file, containing the structural
54 data, associated with the HMM.
55 @type pdb_file: str
56 @note: This is NOT a regular PDB file! It must be specifically formatted
57 for use with HHpred.
58
59 @raise IOError: if any of the files does not exist
60 """
61
62 - def __init__(self, hhm_file, pdb_file=None):
63
64 if not os.path.exists(hhm_file):
65 raise IOError("Could not read HHM file {0}".format(hhm_file))
66
67 if pdb_file:
68 if not os.path.exists(pdb_file):
69 raise IOError("Could not read structure file {0}".format(pdb_file))
70
71 self._file = hhm_file
72 self._pdb = pdb_file
73 self._properties = None
74 self._sequences = None
75 self._profile = None
76
77 self._chopped = False
78 self._chop()
79
124
126 """
127 Parse the HMM profile.
128
129 @param units: also convert the profile score units to the specified
130 L{csb.bio.hmm.ScoreUnits} kind
131 @type units: L{csb.core.EnumItem}
132
133 @return: a L{ProfileHMM} instance
134 @rtype: L{ProfileHMM}
135
136 @raise HHProfileFormatError: when the hhm file is invalid/corrupt
137 """
138 assert self._chopped
139
140 hmm = ProfileHMM(units=units, scale=-1000.0, logbase=2)
141 if self._profile:
142 self._parse_profile(hmm, units)
143 else:
144 raise HHProfileFormatError('Missing HMM profile table.')
145
146 if self._properties:
147 self._parse_properties(hmm)
148 else:
149 raise HHProfileFormatError('No profile properties found.')
150
151 if self._sequences:
152 self._parse_sequences(hmm)
153 else:
154 raise HHProfileFormatError('No profile MSA and secondary structure found.')
155
156 if hmm.dssp:
157 hmm._assign_secstructure()
158
159 if self._pdb:
160 self._parse_structure(hmm)
161
162 return hmm
163
165 """
166 Chop the HHM file into pieces - HMM properties, secondary structure +
167 MSA, HMM.
168 """
169
170 try:
171 with open(self._file, 'r') as f:
172 pr = csb.io.EntryReader(f, 'HHsearch', 'SEQ')
173 self._properties = pr.readall()[0].splitlines()
174
175 with open(self._file, 'r') as f:
176 sr = csb.io.EntryReader(f, '>', '#')
177 self._sequences = sr.readall()
178
179 with open(self._file, 'r') as f:
180 hr = csb.io.EntryReader(f, '#', '//')
181 self._profile = hr.readall()[0].splitlines()
182
183 self._chopped = True
184
185 except IndexError:
186 raise HHProfileFormatError('Corrupt HHM file.')
187
189 """
190 Parse the profile properties
191
192 @param hmm: the hmm object being constructed
193 @type hmm: L{ProfileHMM}
194 @return: the updated hmm
195 @rtype: hmm
196
197 @raise NotImplementedError: if an unexpected data field is encountered
198 """
199 assert self._chopped
200
201 for line in self._properties:
202
203 if line.startswith('NAME '):
204 hmm.name = line[6:].strip()
205
206 elif line.startswith('FAM '):
207 hmm.family = line[6:].strip()
208
209 elif line.startswith('LENG '):
210 m = re.search('(\d+)\D+(\d+)', line).groups()
211 m = tuple(map(int, m))
212 hmm.length = ProfileLength(m[0], m[1])
213
214 elif line.startswith('FILE '):
215 hmm.id = line[6:].strip()
216
217 elif line.startswith('HHsearch'):
218 hmm.version = float(line[9:])
219
220 elif line.startswith('NEFF '):
221 hmm.effective_matches = float(line[6:])
222
223 elif line.startswith('PCT '):
224 if line[6:].strip().lower() == 'true':
225 hmm.pseudocounts = True
226
227 elif line.startswith('EVD '):
228 lamda, mu = map(float, line[6:].split())
229 hmm.evd = EVDParameters(lamda, mu)
230
231 elif line[:5] in ('COM ', 'DATE ', 'FILT '):
232 pass
233 else:
234 raise NotImplementedError("Unexpected line: {0}.".format(line[0:5]))
235
236 if not hmm.id and hmm.name:
237 hmm.id = hmm.name.split()[0]
238 return hmm
239
241 """
242 Parse secondary structure and multiple alignment sections.
243
244 @param hmm: the hmm object being constructed
245 @type hmm: L{ProfileHMM}
246 @return: the updated hmm
247 @rtype: hmm
248 """
249 assert self._chopped
250
251 psipred = None
252 msa_entries = []
253
254 for entry in self._sequences:
255
256 header_token = entry[:8]
257 if header_token in ['>ss_dssp', '>sa_dssp', '>ss_pred', '>ss_conf', '>Consens']:
258
259 lines = entry.strip().splitlines()
260 seq = re.sub('\s+', '', ''.join(lines[1:]))
261
262 if header_token == '>ss_dssp':
263 hmm.dssp = structure.SecondaryStructure(seq)
264 elif header_token == '>sa_dssp':
265 hmm.dssp_solvent = seq
266 elif header_token == '>ss_pred':
267 psipred = seq
268 elif header_token == '>ss_conf':
269 conf = seq
270 hmm.psipred = structure.SecondaryStructure(psipred, conf)
271 elif header_token == '>Consens':
272 hmm.consensus = Sequence('Consensus', 'Consensus', seq)
273 else:
274 msa_entries.append(entry)
275
276 if msa_entries:
277 msa = '\n'.join(msa_entries)
278 hmm.alignment = A3MAlignment.parse(msa, strict=False)
279
280 return hmm
281
283 """
284 Parse the HMM profile.
285
286 @param hmm: the hmm object being constructed
287 @type hmm: L{ProfileHMM}
288 @return: the updated hmm
289 @rtype: L{ProfileHMM}
290
291 @raise NotImplementedError: when an unknown transition string is
292 encountered
293 """
294 assert self._chopped
295
296
297 hmm.start = State(States.Start)
298 hmm.end = State(States.End)
299
300 residues = None
301 background = {}
302 tran_types = None
303 tran_lines = []
304 start_probs = None
305
306 lines = iter(self._profile)
307 pattern = re.compile('^[A-Z\-]\s[0-9]+\s+')
308
309 if units == ScoreUnits.LogScales:
310
311 def parse_probability(v):
312 if v.strip() == '*':
313 return None
314 else:
315 return float(v)
316 else:
317
318 def parse_probability(v):
319 if v.strip() == '*':
320 return None
321 else:
322 return hmm._convert(units, float(v),
323 hmm.scale, hmm.logbase)
324
325
326
327 while True:
328 try:
329 line = next(lines)
330 except StopIteration:
331 break
332
333 if line.startswith('NULL'):
334 try:
335 backprobs = tuple(map(parse_probability, line.split()[1:]))
336
337 line = next(lines)
338 residues = line.split()[1:]
339 residues = [Enum.parse(ProteinAlphabet, aa) for aa in residues]
340
341 for pos, aa in enumerate(residues):
342 background[aa] = backprobs[pos]
343
344 line = next(lines)
345 tran_types = line.split()
346
347 line = next(lines)
348 start_probs = list(map(parse_probability, line.split()))
349 except StopIteration:
350 break
351
352 elif re.match(pattern, line):
353 emrow = line
354 try:
355 tran_lines.append(next(lines))
356
357 except StopIteration:
358 break
359
360 emprobs = emrow.split()
361 if len(emprobs) != 23:
362 raise HHProfileFormatError(
363 "Unexpected number of data fields: {0}".format(len(emprobs)))
364
365 rank = int(emprobs[1])
366 residue = structure.ProteinResidue(
367 rank=rank, type=emprobs[0], sequence_number=rank, insertion_code=None)
368 if residue.type == ProteinAlphabet.GAP:
369 raise HHProfileFormatError("Layer {0} can't be represented by a gap".format(rank))
370
371 new_layer = hmm.layers.append(HMMLayer(rank, residue))
372 if new_layer != rank:
373 raise HHProfileFormatError('Layer {0} defined as {1}'.format(new_layer, rank))
374
375 match = State(States.Match, emit=Enum.members(ProteinAlphabet))
376
377 match.rank = rank
378 match.background.set(background)
379
380 for col, aa in enumerate(residues):
381 prob = parse_probability(emprobs[col + 2])
382 match.emission.append(aa, prob)
383
384 hmm.layers[new_layer].append(match)
385 assert hmm.layers.last_index == match.rank
386
387
388
389
390 if len(hmm.layers) > 0:
391
392 first_match = hmm.layers[hmm.layers.start_index]
393
394 if start_probs[0] is None:
395 raise HHProfileFormatError("Transition Start > Match[1] is undefined")
396
397 start_tran = Transition(hmm.start, first_match[States.Match], start_probs[0])
398 hmm.start.transitions.append(start_tran)
399
400 if start_probs[1] is not None and start_probs[3] is not None:
401 start_ins = State(States.Insertion, emit=Enum.members(ProteinAlphabet))
402 start_ins.rank = 0
403 start_ins.background.set(background)
404 start_ins.emission = start_ins.background
405
406 hmm.start_insertion = start_ins
407
408 hmm.start.transitions.append(
409 Transition(hmm.start, hmm.start_insertion, start_probs[1]))
410
411 hmm.start_insertion.transitions.append(
412 Transition(hmm.start_insertion, first_match[States.Match], start_probs[3]))
413
414 if start_probs[4]:
415 hmm.start_insertion.transitions.append(
416 Transition(hmm.start_insertion, hmm.start_insertion, start_probs[4]))
417
418
419 if start_probs[2] is None and start_probs[6] is not None:
420
421 start_probs[2] = start_probs[6]
422
423 if start_probs[2] is not None:
424 start_del = State(States.Deletion)
425 start_del.rank = 1
426 hmm.layers[1].append(start_del)
427 start_tran = Transition(hmm.start, first_match[States.Deletion], start_probs[2])
428 hmm.start.transitions.append(start_tran)
429 else:
430 start_tran = Transition(hmm.start, hmm.end, start_probs[0])
431 hmm.start.transitions.append(start_tran)
432
433
434
435
436 for rank, fields in enumerate(tran_lines, start=hmm.layers.start_index):
437 assert hmm.layers[rank][States.Match].rank == rank
438
439 ofields = fields.split()
440 fields = tuple(map(parse_probability, ofields))
441
442
443 for col, neff in enumerate(tran_types[7:10], start=7):
444
445 if fields[col] is not None:
446 neff_value = float(ofields[col]) / abs(hmm.scale)
447
448 if neff == 'Neff':
449 hmm.layers[rank].effective_matches = neff_value
450
451 elif neff == 'Neff_I':
452 hmm.layers[rank].effective_insertions = neff_value
453
454 if States.Insertion not in hmm.layers[rank]:
455 insertion = State(States.Insertion, emit=Enum.members(ProteinAlphabet))
456 insertion.background.set(background)
457 insertion.emission.set(background)
458 insertion.rank = rank
459 hmm.layers[rank].append(insertion)
460
461 elif neff == 'Neff_D':
462 hmm.layers[rank].effective_deletions = neff_value
463
464 if States.Deletion not in hmm.layers[rank] and neff_value > 0:
465 deletion = State(States.Deletion)
466 deletion.rank = rank
467 hmm.layers[rank].append(deletion)
468
469
470 for col, tran in enumerate(tran_types):
471 probability = fields[col]
472
473 if probability is not None:
474 try:
475 self._add_transition(hmm, rank, tran, probability)
476 except (CollectionIndexError, ItemNotFoundError) as ex:
477 msg = "Can't add transition {0} at {1}: {2.__class__.__name__}, {2!s}"
478 raise HHProfileFormatError(msg.format(tran, rank, ex))
479
480 return hmm
481
483
484 match = hmm.layers[rank][States.Match]
485 nextmatch = None
486 if rank < hmm.layers.last_index:
487 nextmatch = hmm.layers[rank + 1][States.Match]
488 else:
489 nextmatch = hmm.end
490
491 if tran == 'M->M':
492 transition = Transition(match, nextmatch, probability)
493 match.transitions.append(transition)
494
495 elif tran == 'M->I':
496 insertion = hmm.layers[rank][States.Insertion]
497 transition = Transition(match, insertion, probability)
498 match.transitions.append(transition)
499
500 elif tran == 'M->D':
501 deletion = State(States.Deletion)
502 deletion.rank = rank + 1
503 hmm.layers[rank + 1].append(deletion)
504 transition = Transition(match, deletion, probability)
505 match.transitions.append(transition)
506
507 elif tran == 'I->M':
508 insertion = hmm.layers[rank][States.Insertion]
509 transition = Transition(insertion, nextmatch, probability)
510 insertion.transitions.append(transition)
511
512 elif tran == 'I->I':
513 insertion = hmm.layers[rank][States.Insertion]
514 selfloop = Transition(insertion, insertion, probability)
515 insertion.transitions.append(selfloop)
516
517 elif tran == 'D->M':
518 deletion = hmm.layers[rank][States.Deletion]
519 transition = Transition(deletion, nextmatch, probability)
520 deletion.transitions.append(transition)
521
522 elif tran == 'D->D':
523 deletion = hmm.layers[rank][States.Deletion]
524
525 if States.Deletion not in hmm.layers[rank + 1]:
526 nextdeletion = State(States.Deletion)
527 nextdeletion.rank = rank + 1
528 hmm.layers[rank + 1].append(nextdeletion)
529
530 else:
531 nextdeletion = hmm.layers[rank + 1][States.Deletion]
532 assert match.transitions[States.Deletion].successor == nextdeletion
533
534 transition = Transition(deletion, nextdeletion, probability)
535 deletion.transitions.append(transition)
536
537 else:
538 if not tran.startswith('Neff'):
539 raise NotImplementedError('Unknown transition: {0}'.format(tran))
540
580
581 HHpredProfileParser = HHProfileParser
585 """
586 Builder for HHpred's hhm files.
587
588 @param output: destination stream
589 @type output: file
590 """
591
593
594 if not hasattr(output, 'write'):
595 raise TypeError(output)
596
597 self._out = output
598
599 @property
601 """
602 Destination stream
603 @rtype: stream
604 """
605 return self._out
606
609
613
615 """
616 Append a new HMM to the destination stream.
617
618 @param hmm: profile HMM to serialize
619 @type hmm: L{ProfileHMM}
620 """
621
622 if hmm.score_units != ScoreUnits.LogScales:
623 raise ValueError('Scores must be converted to LogScales first.')
624
625 self.writeline('''HHsearch {0.version}
626 NAME {0.name}
627 FAM {0.family}
628 LENG {0.length.matches} match states, {0.length.layers} columns in multiple alignment
629 NEFF {0.effective_matches}
630 PCT {0.pseudocounts}'''.format(hmm))
631 if hmm.evd:
632 self.writeline('EVD {0.lamda} {0.mu}'.format(hmm.evd))
633
634 self.writeline('SEQ')
635 if hmm.dssp:
636 self.writeline('>ss_dssp')
637 self.writeline(hmm.dssp.to_string())
638 if hmm.dssp_solvent:
639 self.writeline('>sa_dssp')
640 self.writeline(hmm.dssp_solvent)
641 if hmm.psipred:
642 self.writeline('>ss_pred')
643 self.writeline(hmm.psipred.to_string())
644 self.writeline('>ss_conf')
645 confidence = [''.join(map(str, m.score)) for m in hmm.psipred]
646 self.writeline(''.join(confidence))
647
648 if hmm.alignment:
649 if hmm.consensus:
650 self.writeline(str(hmm.consensus))
651 self.writeline(hmm.alignment.format().rstrip('\r\n'))
652
653 self.writeline('#')
654
655 first_match = hmm.layers[1][States.Match]
656 null = [int(first_match.background[aa])
657 for aa in sorted(map(str, first_match.background))]
658 self.writeline('NULL {0}'.format('\t'.join(map(str, null))))
659 self.writeline('HMM {0}'.format(
660 '\t'.join(sorted(map(str, first_match.emission)))))
661
662 tran_types = 'M->M M->I M->D I->M I->I D->M D->D'.split()
663 self.writeline(' {0}'.format(
664 '\t'.join(tran_types + 'Neff Neff_I Neff_D'.split())))
665
666 self.write(" ")
667 for tran_type in tran_types:
668 source_statekind = Enum.parse(States, tran_type[0])
669 target_statekind = Enum.parse(States, tran_type[3])
670 if source_statekind == States.Match:
671 try:
672 self.write("{0:<7}\t".format(
673 int(hmm.start.transitions[target_statekind].probability)))
674 except TransitionNotFoundError:
675 self.write("*\t")
676 else:
677 self.write("*\t")
678 self.writeline('*\t' * 3)
679
680 for layer in hmm.layers:
681
682 self.write("{0} {1:<5}".format(layer.residue.type, layer.rank))
683 for aa in sorted(layer[States.Match].emission):
684 emission = layer[States.Match].emission[aa]
685 if emission is None:
686 emission = '*'
687 else:
688 emission = int(emission)
689 self.write("{0:<7}\t".format(emission))
690 self.writeline("{0}".format(layer.rank))
691
692 self.write(" ")
693 for tran_type in tran_types:
694 source_statekind = Enum.parse(States, tran_type[0])
695 target_statekind = Enum.parse(States, tran_type[3])
696
697 if target_statekind == States.Match and layer.rank == hmm.layers.last_index:
698 target_statekind = States.End
699
700 try:
701 state = layer[source_statekind]
702 self.write("{0:<7}\t".format(
703 int(state.transitions[target_statekind].probability)))
704 except StateNotFoundError:
705 self.write("*\t")
706
707 for data in (layer.effective_matches, layer.effective_insertions,
708 layer.effective_deletions):
709 if data is None:
710 data = '*'
711 else:
712 data = int(data * abs(hmm.scale))
713 self.write("{0:<7}\t".format(data))
714
715 self.writeline("\n")
716
717 self.writeline('//')
718
721 """
722 Parser for HHsearch result (*.hhr) files.
723
724 @param alignments: if set to False, the parser will skip the
725 alignments section of the file
726 @type alignments: bool
727 """
728
733
735 return "<HHsearch Result Parser>"
736
737 @property
739 """
740 True if hit alignments will be parsed
741 @rtype: bool
742 """
743 return self._alignments
744 @alignments.setter
746 self._alignments = bool(value)
747
748 - def parse_file(self, hhr_file, header_only=False):
749 """
750 Parse all hits from this HHpred result file.
751
752 @param hhr_file: input HHR file name
753 @type hhr_file: str
754
755 @return: parsed hits
756 @rtype: HHpredHitList
757
758 @raise HHOutputFormatError: if the hhr file is corrupt
759 """
760
761 with open(os.path.expanduser(hhr_file)) as stream:
762 return self._parse(stream, header_only)
763
765 """
766 Get all hits from an C{output} string.
767
768 @param output: HHpred standard output
769 @type output: str
770
771 @return: parsed hits
772 @rtype: HHpredHitList
773
774 @raise HHOutputFormatError: if the output is corrupt
775 """
776 stream = csb.io.MemoryStream()
777 stream.write(output)
778 stream.seek(0)
779
780 return self._parse(stream, header_only)
781
782 - def _parse(self, stream, header_only):
783
784 qlen = None
785 in_hits = False
786 in_alis = False
787 has_alis = False
788 c_rank = 0
789 header = {}
790 hits = {}
791 alis = {}
792
793 for line in stream:
794
795 if not in_hits and not in_alis:
796
797 if line.replace(' ', '').startswith('NoHitProbE-value'):
798 in_hits = True
799 continue
800 elif line.strip() == '':
801 continue
802 else:
803 columns = line.strip().split(None, 1)
804 if len(columns) == 2:
805
806 identifier, data = columns
807 if identifier in ('Query', 'Command'):
808 data = data.strip()
809 elif identifier == 'Neff':
810 data = float(data)
811 elif identifier in ('Searched_HMMs', 'Match_columns'):
812 data = int(data)
813
814 header[identifier] = data
815
816 if identifier == 'Match_columns':
817 qlen = data
818
819 if in_hits and not header_only:
820 if not line.strip():
821 in_hits = False
822 in_alis = True
823 if self.alignments:
824 continue
825 else:
826 break
827 elif line.strip() == 'Done':
828 in_hits = False
829 in_alis = False
830 break
831
832 description = line[:34].split()
833 rank = int(description[0])
834 id = description[1]
835
836 pos = line[85:94].strip()
837 start, end = map(int, pos.split('-'))
838
839 qpos = line[75:84].strip()
840 qstart, qend = map(int, qpos.split('-'))
841
842 probability = float(line[35:40]) / 100.0
843
844 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen)
845
846 hit.evalue = float(line[41:48])
847 hit.pvalue = float(line[49:56])
848 hit.score = float(line[57:63])
849 hit.ss_score = float(line[64:69])
850
851 hit.slength = int(line[94:].replace('(', '').replace(')', ''))
852
853 hits[hit.rank] = hit
854 alis[hit.rank] = {'q': [], 's': []}
855
856 elif in_alis and not header_only:
857 if line.startswith('Done'):
858 in_alis = False
859 break
860
861 elif line.startswith('No '):
862 c_rank = int(line[3:])
863 if c_rank not in hits:
864 raise HHOutputFormatError('Alignment {0}. refers to a non-existing hit'.format(c_rank))
865
866 elif line.startswith('>'):
867 hits[c_rank].name = line[1:].strip()
868
869 elif line.startswith('Probab='):
870 for pair in line.split():
871 key, value = pair.split('=')
872 if key == 'Identities':
873 hits[c_rank].identity = float(
874 value.replace('%', ''))
875 elif key == 'Similarity':
876 hits[c_rank].similarity = float(value)
877 elif key == 'Sum_probs':
878 hits[c_rank].prob_sum = float(value)
879
880 elif line.startswith('Q ') and not line[:11].rstrip() in ('Q Consensus', 'Q ss_pred','Q ss_conf', 'Q ss_dssp'):
881 for residue in line[22:]:
882 if residue.isspace() or residue.isdigit():
883 break
884 else:
885 alis[c_rank]['q'].append(residue)
886 has_alis = True
887
888 elif line.startswith('T ') and not line[:11].rstrip() in ('T Consensus', 'T ss_pred','T ss_conf', 'T ss_dssp'):
889 for residue in line[22:]:
890 if residue.isspace() or residue.isdigit():
891 break
892 else:
893 alis[c_rank]['s'].append(residue)
894
895 if self.alignments and has_alis:
896 for rank in alis:
897 try:
898 hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s'])
899
900 except (KeyError, ValueError) as er:
901 raise HHOutputFormatError('Corrupt alignment at hit No {0}.\n {1}'.format(rank, er))
902
903 del alis
904
905 hits = HHpredHitList(hits.values())
906
907 hits.sort()
908
909
910 for identifier, data in header.items():
911 if identifier == 'Query':
912 hits.query_name = data
913 elif identifier == 'Match_columns':
914 hits.match_columns = data
915 elif identifier == 'No_of_seqs':
916 hits.no_of_seqs = data
917 elif identifier == 'Neff':
918 hits.neff = data
919 elif identifier == 'Searched_HMMs':
920 hits.searched_hmms = data
921 elif identifier == 'Date':
922 hits.date = data
923 elif identifier == 'Command':
924 hits.command = data
925
926 return hits
927
928 HHpredOutputParser = HHOutputParser
929