Previous CloneSet | Next CloneSet | Back to Main Report |
Clone Mass | Clones in CloneSet | Parameter Count | Clone Similarity | Syntax Category [Sequence Length] |
---|---|---|---|---|
180 | 2 | 2 | 0.999 | file_input_element_list[4] |
Clone Abstraction | Parameter Bindings |
Clone Instance (Click to see clone) | Line Count | Source Line | Source File |
---|---|---|---|
1 | 180 | 451 | Bio/MEME/Parser.py |
2 | 178 | 535 | Bio/Motif/Parsers/MEME.py |
| ||||
class MASTParser(AbstractParser): ''' Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord A MASTParser takes a file handle for a MAST text output file and returns a MASTRecord, containing the hits between motifs and sequences. The parser does some unusual line buffering to parse out match diagrams. Really complex diagrams often lead to an error message and p-values not being parsed for a given line. Methods: parse (handle): parses the data from the file handle passed to it. Example: f = open("mast_file.txt") parser = MASTParser() mast_record = parser.parse(f) for motif in mast_record.motifs: for instance in motif.instances: print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue ''' def __init__ (self): self._consumer = _MASTConsumer( ) self._scanner = _MASTScanner( ) def parse (self,handle): self._scanner.feed(handle,self._consumer) return self._consumer.data class _MASTScanner: """ Scanner for MAST text output. """ def feed (self,handle,consumer): if isinstance(handle,File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_header(uhandle,consumer) self._scan_matches(uhandle,consumer) self._scan_annotated_matches(uhandle,consumer) def _scan_header (self,uhandle,consumer): try : read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") except ValueError: raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") read_and_call(uhandle,consumer._version,contains = "MAST version") read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") read_and_call(uhandle,consumer.noevent,start = "DATABASE") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer._database,contains = "DATABASE") read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") read_and_call(uhandle,consumer.noevent,contains = "MOTIF") read_and_call(uhandle,consumer.noevent,contains = "----") read_and_call_until(uhandle,consumer._add_motif,blank = 1) read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") def _scan_matches (self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "---") # read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1) read_and_call_until(uhandle,consumer.noevent,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) def _scan_annotated_matches (self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") read_and_call(uhandle,consumer.noevent,start = "SECTION III:") read_and_call_until(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call_until(uhandle,consumer.noevent,start = "*****") read_and_call(uhandle,consumer.noevent) read_and_call_while(uhandle,consumer.noevent,blank = 1) readMatches = 1 while readMatches==1: if consumer._current_seq: if consumer._buffer_size!=0: consumer._parse_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer._set_current_seq) read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM") read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) consumer._add_diagram_from_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer.noevent,blank = 1) while 1: line = safe_peekline(uhandle) if line.startswith("****"): consumer._parse_buffer(None) readMatches = 0 break read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) consumer._collapse_buffer(None) if attempt_read_and_call(uhandle,consumer.noevent,blank = 1): break elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"): consumer._parse_buffer(None) consumer._blank_buffer(None) readMatches = 0 break class MASTRecord: """The class for holding the results from a MAST run. A MASTRecord holds data about matches between motifs and sequences. The motifs held by the MASTRecord are objects of the class MEMEMotif. Methods: get_motif_matches_for_sequence(sequence_name): returns all of the motif matches within a given sequence. The matches are objects of the class MEME.Motif.Instance get_motif_matches (motif_name): returns all of the matches for a motif in the sequences searched. The matches returned are of class MEME.Motif.Instance get_motif_by_name (motif_name): returns a MEMEMotif with the given name. """ def __init__ (self): self.sequences = [ ] self.version = "" self.matches = [ ] self.database = "" self.diagrams = { } self.alphabet = None self.motifs = [ ] def _version (self,version): self.version = version def _alphabet (self,alphabet): if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna: self.alphabet = alphabet else: return -1 def _database(self,database): self.database = database def get_motif_matches_for_sequence (self,seq): insts = [ ] for m in self.motifs: for i in m.instances: if i.sequence_name==seq: insts.append(i) insts.sort( lambda x,y:cmp(x.start,y.start)) return insts def get_motif_matches (self,motif): m = self.get_motif_by_name (motif.name) return m.instances def _add_diagram_for_sequence (self,diagram,seq): self.diagrams[seq] = diagram def _add_match (self,match): self.matches.append(match) def _add_sequence (self,sequence): self.sequences.append(sequence) def _add_motif (self,motif): self.motifs.append(motif) def get_motif_by_name (self,name): for m in self.motifs: if m.name==name: return m |
| ||||
class MASTParser(AbstractParser): ''' Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord A MASTParser takes a file handle for a MAST text output file and returns a MASTRecord, containing the hits between motifs and sequences. The parser does some unusual line buffering to parse out match diagrams. Really complex diagrams often lead to an error message and p-values not being parsed for a given line. Methods: parse (handle): parses the data from the file handle passed to it. Example: >>>f = open("mast_file.txt") >>>parser = MASTParser() >>>mast_record = parser.parse(f) >>>for motif in mast_record.motifs: >>> for instance in motif.instances: >>> print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue ''' def __init__ (self): self._consumer = _MASTConsumer( ) self._scanner = _MASTScanner( ) def parse (self,handle): self._scanner.feed(handle,self._consumer) return self._consumer.data class _MASTScanner: """ Scanner for MAST text output. """ def feed (self,handle,consumer): if isinstance(handle,File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_header(uhandle,consumer) self._scan_matches(uhandle,consumer) self._scan_annotated_matches(uhandle,consumer) def _scan_header (self,uhandle,consumer): try : read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") except ValueError: raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") read_and_call(uhandle,consumer._version,contains = "MAST version") read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") read_and_call(uhandle,consumer.noevent,start = "DATABASE") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer._database,contains = "DATABASE") read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") read_and_call(uhandle,consumer.noevent,contains = "MOTIF") read_and_call(uhandle,consumer.noevent,contains = "----") read_and_call_until(uhandle,consumer._add_motif,blank = 1) read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") def _scan_matches (self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "---") # read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1) read_and_call_until(uhandle,consumer.noevent,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) def _scan_annotated_matches (self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") read_and_call(uhandle,consumer.noevent,start = "SECTION III:") read_and_call_until(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call_until(uhandle,consumer.noevent,start = "*****") read_and_call(uhandle,consumer.noevent) read_and_call_while(uhandle,consumer.noevent,blank = 1) readMatches = 1 while readMatches==1: if consumer._current_seq: if consumer._buffer_size!=0: consumer._parse_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer._set_current_seq) read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM") read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) consumer._add_diagram_from_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer.noevent,blank = 1) while 1: line = safe_peekline(uhandle) if line.startswith("****"): consumer._parse_buffer(None) readMatches = 0 break read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) consumer._collapse_buffer(None) if attempt_read_and_call(uhandle,consumer.noevent,blank = 1): break elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"): consumer._parse_buffer(None) consumer._blank_buffer(None) readMatches = 0 break class MASTRecord: """The class for holding the results from a MAST run. A MASTRecord holds data about matches between motifs and sequences. The motifs held by the MASTRecord are objects of the class MEMEMotif. Methods: get_motif_matches_for_sequence(sequence_name): returns all of the motif matches within a given sequence. The matches are objects of the class MEMEInstance get_motif_matches (motif_name): returns all of the matches for a motif in the sequences searched. The matches returned are of class MEMEInstance get_motif_by_name (motif_name): returns a MEMEMotif with the given name. """ def __init__ (self): self.sequences = [ ] self.version = "" self.matches = [ ] self.database = "" self.diagrams = { } self.alphabet = None self.motifs = [ ] def _version (self,version): self.version = version def _alphabet (self,alphabet): if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna: self.alphabet = alphabet else: return -1 def _database(self,database): self.database = database def get_motif_matches_for_sequence (self,seq): insts = [ ] for m in self.motifs: for i in m.instances: if i.sequence_name==seq: insts.append(i) insts.sort( lambda x,y:cmp(x.start,y.start)) return insts def get_motif_matches (self,motif): m = self.get_motif_by_name (motif.name) return m.instances def _add_diagram_for_sequence (self,diagram,seq): self.diagrams[seq] = diagram def _add_match (self,match): self.matches.append(match) def _add_sequence (self,sequence): self.sequences.append(sequence) def _add_motif (self,motif): self.motifs.append(motif) def get_motif_by_name (self,name): for m in self.motifs: if m.name==name: return m |
| |||
class MASTParser(AbstractParser): [[#variable2d9a26c0]] def __init__(self): self._consumer = _MASTConsumer( ) self._scanner = _MASTScanner( ) def parse(self,handle): self._scanner.feed(handle,self._consumer) return self._consumer.data class _MASTScanner: """ Scanner for MAST text output. """ def feed(self,handle,consumer): if isinstance(handle,File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_header(uhandle,consumer) self._scan_matches(uhandle,consumer) self._scan_annotated_matches(uhandle,consumer) def _scan_header(self,uhandle,consumer): try : read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") except ValueError: raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") read_and_call(uhandle,consumer._version,contains = "MAST version") read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") read_and_call(uhandle,consumer.noevent,start = "DATABASE") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer._database,contains = "DATABASE") read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") read_and_call(uhandle,consumer.noevent,contains = "MOTIF") read_and_call(uhandle,consumer.noevent,contains = "----") read_and_call_until(uhandle,consumer._add_motif,blank = 1) read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") def _scan_matches(self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") read_and_call(uhandle,consumer.noevent,start = "---") # read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1) read_and_call_until(uhandle,consumer.noevent,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) def _scan_annotated_matches(self,uhandle,consumer): read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") read_and_call(uhandle,consumer.noevent,start = "SECTION III:") read_and_call_until(uhandle,consumer.noevent,start = "****") read_and_call(uhandle,consumer.noevent,start = "****") read_and_call_until(uhandle,consumer.noevent,start = "*****") read_and_call(uhandle,consumer.noevent) read_and_call_while(uhandle,consumer.noevent,blank = 1) readMatches = 1 while readMatches==1: if consumer._current_seq: if consumer._buffer_size!=0: consumer._parse_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer._set_current_seq) read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM") read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) consumer._add_diagram_from_buffer(None) consumer._blank_buffer(None) read_and_call(uhandle,consumer.noevent,blank = 1) while 1: line = safe_peekline(uhandle) if line.startswith("****"): consumer._parse_buffer(None) readMatches = 0 break read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) read_and_call(uhandle,consumer.noevent,blank = 1) consumer._collapse_buffer(None) if attempt_read_and_call(uhandle,consumer.noevent,blank = 1): break elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"): consumer._parse_buffer(None) consumer._blank_buffer(None) readMatches = 0 break class MASTRecord: [[#variable2d9a26a0]] def __init__(self): self.sequences = [ ] self.version = "" self.matches = [ ] self.database = "" self.diagrams = { } self.alphabet = None self.motifs = [ ] def _version(self,version): self.version = version def _alphabet(self,alphabet): if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna: self.alphabet = alphabet else: return -1 def _database(self,database): self.database = database def get_motif_matches_for_sequence(self,seq): insts = [ ] for m in self.motifs: for i in m.instances: if i.sequence_name==seq: insts.append(i) insts.sort( lambda x,y:cmp(x.start,y.start)) return insts def get_motif_matches(self,motif): m = self.get_motif_by_name(motif.name) return m.instances def _add_diagram_for_sequence(self,diagram,seq): self.diagrams[seq] = diagram def _add_match(self,match): self.matches.append(match) def _add_sequence(self,sequence): self.sequences.append(sequence) def _add_motif(self,motif): self.motifs.append(motif) def get_motif_by_name(self,name): for m in self.motifs: if m.name==name: return m |
CloneAbstraction |
Parameter Index | Clone Instance | Parameter Name | Value |
---|---|---|---|
1 | 1 | [[#2d9a26c0]] | ''' Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord A MASTParser takes a file handle for a MAST text output file and returns a MASTRecord, containing the hits between motifs and sequences. The parser does some unusual line buffering to parse out match diagrams. Really complex diagrams often lead to an error message and p-values not being parsed for a given line. Methods: parse (handle): parses the data from the file handle passed to it. Example: f = open("mast_file.txt") parser = MASTParser() mast_record = parser.parse(f) for motif in mast_record.motifs: for instance in motif.instances: print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue ''' |
1 | 2 | [[#2d9a26c0]] | ''' Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord A MASTParser takes a file handle for a MAST text output file and returns a MASTRecord, containing the hits between motifs and sequences. The parser does some unusual line buffering to parse out match diagrams. Really complex diagrams often lead to an error message and p-values not being parsed for a given line. Methods: parse (handle): parses the data from the file handle passed to it. Example: >>>f = open("mast_file.txt") >>>parser = MASTParser() >>>mast_record = parser.parse(f) >>>for motif in mast_record.motifs: >>> for instance in motif.instances: >>> print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue ''' |
2 | 1 | [[#2d9a26a0]] | """The class for holding the results from a MAST run. A MASTRecord holds data about matches between motifs and sequences. The motifs held by the MASTRecord are objects of the class MEMEMotif. Methods: get_motif_matches_for_sequence(sequence_name): returns all of the motif matches within a given sequence. The matches are objects of the class MEME.Motif.Instance get_motif_matches (motif_name): returns all of the matches for a motif in the sequences searched. The matches returned are of class MEME.Motif.Instance get_motif_by_name (motif_name): returns a MEMEMotif with the given name. """ |
2 | 2 | [[#2d9a26a0]] | """The class for holding the results from a MAST run. A MASTRecord holds data about matches between motifs and sequences. The motifs held by the MASTRecord are objects of the class MEMEMotif. Methods: get_motif_matches_for_sequence(sequence_name): returns all of the motif matches within a given sequence. The matches are objects of the class MEMEInstance get_motif_matches (motif_name): returns all of the matches for a motif in the sequences searched. The matches returned are of class MEMEInstance get_motif_by_name (motif_name): returns a MEMEMotif with the given name. """ |