Previous CloneSet | Next CloneSet | Back to Main Report |
Clone Mass | Clones in CloneSet | Parameter Count | Clone Similarity | Syntax Category [Sequence Length] |
---|---|---|---|---|
230 | 2 | 6 | 0.985 | compound_stmt |
Clone Abstraction | Parameter Bindings |
Clone Instance (Click to see clone) | Line Count | Source Line | Source File |
---|---|---|---|
1 | 230 | 218 | Bio/MEME/Parser.py |
2 | 230 | 302 | Bio/Motif/Parsers/MEME.py |
| ||||
class _MASTConsumer: """ Consumer that can receive events from _MASTScanner. A _MASTConsumer parses lines from a mast text output file. The motif match diagrams are parsed using line buffering. Each of the buffering functions have a dummy variable that is required for testing using the Bio.ParserSupport.TaggingConsumer. If this variable isn't there, the TaggingConsumer barfs. In the _MASTScanner, None is passed in the place of this variable. """ def __init__ (self): self.data = MASTRecord( ) self._current_seq = "" self._line_buffer = [ ] self._buffer_size = 0 self._buffered_seq_start = 0 def _version (self,line): line = line.strip( ) ls = line.split( ) self.data._version(ls[2]) def _database (self,line): line = line.strip( ) ls = line.split( ) self.data._database(ls[1]) al = "" if ls[2]=="(nucleotide)": al = IUPAC.unambiguous_dna self.data._alphabet(al) else: al = IUPAC.protein self.data._alphabet(al) def _add_motif (self,line): line = line.strip( ) ls = line.split( ) m = Motif.MEMEMotif( ) m._alphabet(self.data.alphabet) m._length(ls[1]) name = ls[0] m._name(name) m._consensus(ls[2]) self.data._add_motif(m) def _add_match_diagram (self,line): line = line.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = Motif.Instance( ) inst._seqname (self._current_seq) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_sequence_match_with_diagram (self,line): line = line.strip( ) ls = line.split( ) self.data._add_sequence(ls[0]) self.data._add_diagram_for_sequence(ls[2],ls[0]) ds = ls[2].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("+")!= -1 or ds[i].find("-")!= -1: inst = Motif.Instance( ) inst._seqname (ls[0]) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_diagram_from_buffer (self,dummy): line = "" for l in self._line_buffer: line+=l.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = Motif.Instance( ) inst._seqname (self._current_seq) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _set_current_seq (self,line): line = line.strip( ) self._current_seq = line if not self.data.sequences.count(line): self.data.sequences.append(line) def _add_line_to_buffer (self,line): line = line.strip( ) if not line.startswith("*****"): self._line_buffer.append(line) else: return -1 def _parse_buffer (self,dummy): """Parses the line buffer to get e-values for each instance of a motif. This buffer parser is the most likely point of failure for the MASTParser. """ insts = self.data.get_motif_matches_for_sequence(self._current_seq) if len(insts)>0: fullSeq = self._line_buffer[self._buffer_size-1] pvals = self._line_buffer[1].split( ) p = 0 lpval = len(pvals) while p<lpval: if pvals[p].count("e")>1: #Break blocks up by e and parse into valid floats. This only #works if there are no e-values greater than 1e-5. pvs = [ ] spe = pvals[p].split("e") spe.reverse( ) dotind = spe[1].find(".") if dotind== -1: thispval = spe[1][ -1]+"e"+spe[0] else: thispval = spe[1][dotind-1: ]+"e"+spe[0] pvs.append(thispval) for spi in range(2,len(spe)): dotind = spe[spi].find(".") prevdotind = spe[spi-1].find(".") if dotind!= -1: if prevdotind== -1: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1] else: if prevdotind== -1: thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1] pvs.append(thispval) pvs.reverse( ) if p>0: pvals = pvals[0:p]+pvs+pvals[p+1: ] else: pvals = pvs+pvals[p+1: ] lpval = len(pvals) p+=1 i = 0 if len(pvals)!=len(insts): sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+""" """ ) pvals = [ ] # else: # sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n") for i in range(0,len(insts)): inst = insts[i] start = inst.start-self._buffered_seq_start+1 thisSeq = fullSeq[start:start+inst.length] thisSeq = Seq.Seq(thisSeq,self.data.alphabet) inst._sequence(thisSeq) if pvals: inst._pvalue(float(pvals[i])) def _blank_buffer (self,dummy): self._line_buffer = [ ] self._buffer_size = 0 def _collapse_buffer(self,dummy): if self._buffer_size==0: if len(self._line_buffer)>0: self._buffer_size = len(self._line_buffer) ll = self._line_buffer[self._buffer_size-1].split( ) self._line_buffer[self._buffer_size-1] = ll[1] self._buffered_seq_start = int(ll[0]) else: i = 0 for i in range(self._buffer_size,len(self._line_buffer)-1): self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( ) ll = self._line_buffer[len(self._line_buffer)-1].split( ) if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]): self._line_buffer[self._buffer_size-1]+=ll[1] else: differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1])) self._line_buffer[self._buffer_size-1]+="N"*differ self._line_buffer[self._buffer_size-1]+=ll[1] self._line_buffer = self._line_buffer[0:self._buffer_size] def _add_motif_match (self,line): line = line.strip( ) if line.find("[")!= -1 or line.find("<")!= -1: pass elif line.find("e")!= -1: pass elif line.find("+")!= -1: pass def noevent (self,line): pass |
| ||||
class _MASTConsumer: """ Consumer that can receive events from _MASTScanner. A _MASTConsumer parses lines from a mast text output file. The motif match diagrams are parsed using line buffering. Each of the buffering functions have a dummy variable that is required for testing using the Bio.ParserSupport.TaggingConsumer. If this variable isn't there, the TaggingConsumer barfs. In the _MASTScanner, None is passed in the place of this variable. """ def __init__ (self): self.data = MASTRecord( ) self._current_seq = "" self._line_buffer = [ ] self._buffer_size = 0 self._buffered_seq_start = 0 def _version (self,line): line = line.strip( ) ls = line.split( ) self.data._version(ls[2]) def _database (self,line): line = line.strip( ) ls = line.split( ) self.data._database(ls[1]) al = "" if ls[2]=="(nucleotide)": al = IUPAC.unambiguous_dna self.data._alphabet(al) else: al = IUPAC.protein self.data._alphabet(al) def _add_motif (self,line): line = line.strip( ) ls = line.split( ) m = MEMEMotif( ) m.alphabet = self.data.alphabet m.length = ls[1] name = ls[0] m.name = name m.add_instance(ls[2]) self.data._add_motif(m) def _add_match_diagram (self,line): line = line.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = MEMEInstance( ) inst._seqname (self._current_seq) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_sequence_match_with_diagram (self,line): line = line.strip( ) ls = line.split( ) self.data._add_sequence(ls[0]) self.data._add_diagram_for_sequence(ls[2],ls[0]) ds = ls[2].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("+")!= -1 or ds[i].find("-")!= -1: inst = MEMEInstance( ) inst._seqname (ls[0]) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_diagram_from_buffer (self,dummy): line = "" for l in self._line_buffer: line+=l.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = MEMEInstance( ) inst._seqname (self._current_seq) inst._start (start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _set_current_seq (self,line): line = line.strip( ) self._current_seq = line if not self.data.sequences.count(line): self.data.sequences.append(line) def _add_line_to_buffer (self,line): line = line.strip( ) if not line.startswith("*****"): self._line_buffer.append(line) else: return -1 def _parse_buffer (self,dummy): """Parses the line buffer to get e-values for each instance of a motif. This buffer parser is the most likely point of failure for the MASTParser. """ insts = self.data.get_motif_matches_for_sequence(self._current_seq) if len(insts)>0: fullSeq = self._line_buffer[self._buffer_size-1] pvals = self._line_buffer[1].split( ) p = 0 lpval = len(pvals) while p<lpval: if pvals[p].count("e")>1: #Break blocks up by e and parse into valid floats. This only #works if there are no e-values greater than 1e-5. pvs = [ ] spe = pvals[p].split("e") spe.reverse( ) dotind = spe[1].find(".") if dotind== -1: thispval = spe[1][ -1]+"e"+spe[0] else: thispval = spe[1][dotind-1: ]+"e"+spe[0] pvs.append(thispval) for spi in range(2,len(spe)): dotind = spe[spi].find(".") prevdotind = spe[spi-1].find(".") if dotind!= -1: if prevdotind== -1: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1] else: if prevdotind== -1: thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1] pvs.append(thispval) pvs.reverse( ) if p>0: pvals = pvals[0:p]+pvs+pvals[p+1: ] else: pvals = pvs+pvals[p+1: ] lpval = len(pvals) p+=1 i = 0 if len(pvals)!=len(insts): sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+""" """ ) pvals = [ ] # else: # sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n") for i in range(0,len(insts)): inst = insts[i] start = inst.start-self._buffered_seq_start+1 thisSeq = fullSeq[start:start+inst.length] thisSeq = Seq.Seq(thisSeq,self.data.alphabet) inst._sequence(thisSeq) if pvals: inst._pvalue(float(pvals[i])) def _blank_buffer (self,dummy): self._line_buffer = [ ] self._buffer_size = 0 def _collapse_buffer(self,dummy): if self._buffer_size==0: if len(self._line_buffer)>0: self._buffer_size = len(self._line_buffer) ll = self._line_buffer[self._buffer_size-1].split( ) self._line_buffer[self._buffer_size-1] = ll[1] self._buffered_seq_start = int(ll[0]) else: i = 0 for i in range(self._buffer_size,len(self._line_buffer)-1): self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( ) ll = self._line_buffer[len(self._line_buffer)-1].split( ) if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]): self._line_buffer[self._buffer_size-1]+=ll[1] else: differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1])) self._line_buffer[self._buffer_size-1]+="N"*differ self._line_buffer[self._buffer_size-1]+=ll[1] self._line_buffer = self._line_buffer[0:self._buffer_size] def _add_motif_match (self,line): line = line.strip( ) if line.find("[")!= -1 or line.find("<")!= -1: pass elif line.find("e")!= -1: pass elif line.find("+")!= -1: pass def noevent (self,line): pass |
| |||
class _MASTConsumer: """ Consumer that can receive events from _MASTScanner. A _MASTConsumer parses lines from a mast text output file. The motif match diagrams are parsed using line buffering. Each of the buffering functions have a dummy variable that is required for testing using the Bio.ParserSupport.TaggingConsumer. If this variable isn't there, the TaggingConsumer barfs. In the _MASTScanner, None is passed in the place of this variable. """ def __init__(self): self.data = MASTRecord( ) self._current_seq = "" self._line_buffer = [ ] self._buffer_size = 0 self._buffered_seq_start = 0 def _version(self,line): line = line.strip( ) ls = line.split( ) self.data._version(ls[2]) def _database(self,line): line = line.strip( ) ls = line.split( ) self.data._database(ls[1]) al = "" if ls[2]=="(nucleotide)": al = IUPAC.unambiguous_dna self.data._alphabet(al) else: al = IUPAC.protein self.data._alphabet(al) def _add_motif(self,line): line = line.strip( ) ls = line.split( ) m = [[#variable2ed1cb40]]( ) [[#variable2ed1ca80]] [[#variable2ed1c9c0]] name = ls[0] [[#variable2ed1c960]] m. [[#variable2ed1c900]](ls[2]) self.data._add_motif(m) def _add_match_diagram(self,line): line = line.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = [[#variable2ed1c8a0]]( ) inst._seqname(self._current_seq) inst._start(start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_sequence_match_with_diagram(self,line): line = line.strip( ) ls = line.split( ) self.data._add_sequence(ls[0]) self.data._add_diagram_for_sequence(ls[2],ls[0]) ds = ls[2].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("+")!= -1 or ds[i].find("-")!= -1: inst = [[#variable2ed1c8a0]]( ) inst._seqname(ls[0]) inst._start(start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _add_diagram_from_buffer(self,dummy): line = "" for l in self._line_buffer: line+=l.strip( ) ls = line.split( ) self.data._add_diagram_for_sequence(ls[1],self._current_seq) ds = ls[1].split("_") i = 0 start = 0 for i in range(0,len(ds)): if ds[i].find("[")!= -1 or ds[i].find("<")!= -1: inst = [[#variable2ed1c8a0]]( ) inst._seqname(self._current_seq) inst._start(start) r = re.compile("\\d+") mn = r.findall(ds[i])[0] if ds[i].find("-")!= -1: inst.strand = "-" else: inst.strand = "+" motif = self.data.get_motif_by_name(mn) motif.add_instance(inst) start+=motif.length else: start+=int(ds[i]) def _set_current_seq(self,line): line = line.strip( ) self._current_seq = line if not self.data.sequences.count(line): self.data.sequences.append(line) def _add_line_to_buffer(self,line): line = line.strip( ) if not line.startswith("*****"): self._line_buffer.append(line) else: return -1 def _parse_buffer(self,dummy): """Parses the line buffer to get e-values for each instance of a motif. This buffer parser is the most likely point of failure for the MASTParser. """ insts = self.data.get_motif_matches_for_sequence(self._current_seq) if len(insts)>0: fullSeq = self._line_buffer[self._buffer_size-1] pvals = self._line_buffer[1].split( ) p = 0 lpval = len(pvals) while p<lpval: if pvals[p].count("e")>1: #Break blocks up by e and parse into valid floats. This only #works if there are no e-values greater than 1e-5. pvs = [ ] spe = pvals[p].split("e") spe.reverse( ) dotind = spe[1].find(".") if dotind== -1: thispval = spe[1][ -1]+"e"+spe[0] else: thispval = spe[1][dotind-1: ]+"e"+spe[0] pvs.append(thispval) for spi in range(2,len(spe)): dotind = spe[spi].find(".") prevdotind = spe[spi-1].find(".") if dotind!= -1: if prevdotind== -1: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1] else: if prevdotind== -1: thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1] else: thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1] pvs.append(thispval) pvs.reverse( ) if p>0: pvals = pvals[0:p]+pvs+pvals[p+1: ] else: pvals = pvs+pvals[p+1: ] lpval = len(pvals) p+=1 i = 0 if len(pvals)!=len(insts): sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+""" """ ) pvals = [ ] # else: # sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n") for i in range(0,len(insts)): inst = insts[i] start = inst.start-self._buffered_seq_start+1 thisSeq = fullSeq[start:start+inst.length] thisSeq = Seq.Seq(thisSeq,self.data.alphabet) inst._sequence(thisSeq) if pvals: inst._pvalue(float(pvals[i])) def _blank_buffer(self,dummy): self._line_buffer = [ ] self._buffer_size = 0 def _collapse_buffer(self,dummy): if self._buffer_size==0: if len(self._line_buffer)>0: self._buffer_size = len(self._line_buffer) ll = self._line_buffer[self._buffer_size-1].split( ) self._line_buffer[self._buffer_size-1] = ll[1] self._buffered_seq_start = int(ll[0]) else: i = 0 for i in range(self._buffer_size,len(self._line_buffer)-1): self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( ) ll = self._line_buffer[len(self._line_buffer)-1].split( ) if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]): self._line_buffer[self._buffer_size-1]+=ll[1] else: differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1])) self._line_buffer[self._buffer_size-1]+="N"*differ self._line_buffer[self._buffer_size-1]+=ll[1] self._line_buffer = self._line_buffer[0:self._buffer_size] def _add_motif_match(self,line): line = line.strip( ) if line.find("[")!= -1 or line.find("<")!= -1: pass elif line.find("e")!= -1: pass elif line.find("+")!= -1: pass def noevent(self,line): pass |
CloneAbstraction |
Parameter Index | Clone Instance | Parameter Name | Value |
---|---|---|---|
1 | 1 | [[#2ed1cb40]] | Motif.MEMEMotif |
1 | 2 | [[#2ed1cb40]] | MEMEMotif |
2 | 1 | [[#2ed1ca80]] | m._alphabet(self.data.alphabet) |
2 | 2 | [[#2ed1ca80]] | m.alphabet = self.data.alphabet |
3 | 1 | [[#2ed1c9c0]] | m._length(ls[1]) |
3 | 2 | [[#2ed1c9c0]] | m.length = ls[1] |
4 | 1 | [[#2ed1c960]] | m._name(name) |
4 | 2 | [[#2ed1c960]] | m.name = name |
5 | 1 | [[#2ed1c900]] | _consensus |
5 | 2 | [[#2ed1c900]] | add_instance |
6 | 1 | [[#2ed1c8a0]] | Motif.Instance |
6 | 2 | [[#2ed1c8a0]] | MEMEInstance |