"""this module carried the Motifs class for handling motifs, reading them from files, etc.

Author: Kenneth Wayne Berendzen -> kwberendzen@alumni.utexas.net
License: GPLv2.0
Copyright: Kenneth Wayne Berendzen and University of Tuebingen 2008
"""

import types
import re
import copy

class Motifs:

	

	def __init__(self):
		self.dictIUPAC = {'N':'[A|T|G|C]','R':'[A|G]','Y':'[T|C]','W':'[A|T]','S':'[G|C]','M':'[A|C]','K':'[T|G]','H':'[A|T|C]','B':'[T|G|C]','V':'[A|G|C]','D':'[A|T|G]'}		
		self.revIUPAC = {'T':'A', 'A':'T', 'C':'G', 'G':'C', 'R':'Y', 'Y':'R', 'M':'K', 'K':'M', 'D':'H', 'H':'D', 'B':'V', 'V':'B', 'N':'N', 'S':'S', 'W':'W'}

	def __str__(self):
		pass


	def elems(self, _xrange, _yrange):
		"""returns a list of elements of the list to itself for all elements.
		this is supposed to be used to make oligonucleotides, only.
		"""
		l = []
		l = [x + y for x in _xrange for y in _yrange]
		return l


	def allOligos(self, len, DNA = 1):
		"""returns a list of oligonucleotides of the specified size.
		 DNA is the default 'atgc', alternative 0 is RNA 'augc'
		"""
	
		#print str(len) + str(type(len))

		if DNA == 1:
			a = ['a','t','g','c']
		elif DNA == 0:
			a = ['a','u','g','c']
		else:
			raise ValueError, "you can only enter 0 or 1"
		
		if type(len) != types.IntType:
			raise TypeError, "you must enter an integer"
	
		if len < 1:
			#print "this works"
			raise ValueError, "integer value must be bigger than 0"
		elif len == 1:
			return a
		else:
			b = a
			ctr = 0
			while ctr < len-1:
				#print ctr
				b = self.elems(b, a)
				ctr = ctr + 1
			return b
	
	
	def listfromfile(self, handle, mode = 0):
		"""this reads a \n delimited file for making a list of motifs 
		(begin after a single line with .. , mode=0 or 
		a list of sequence names (begins after a single line with // , mode=1
		handle is closed when exiting
		"""
	
		if mode != 0 and mode != 1:
			raise ValueError, "mode can only be 0 or 1"
	
		begin = 0
		l = []
		
		for i in handle:
	
			if i[0:-1] == ".." and mode == 0:
				begin = 1
				break
			elif i[0:-1] == "//" and mode == 1:
				begin = 0	
				break
		if begin == 1:
			for i in handle:
			
				if len(i) > 1:
					l.append(i[0:-1]) #get rid of the \n character
	
		handle.close()
		return l
	
	
	def appendstringtolist(self, _list, _string, pos = 0):		
		"""use this to add a fixed string to a known list. 
		 You may place it before or after, pos=0 after, pos=1 before.
		"""
		if pos != 0 and pos != 1:
			raise ValueError, "mode can only be 0 or 1"
	
		if not type(_list) == types.ListType:
			raise ValueError, "variable for _list must be type list"
	
		if not type(_string) == types.StringType:
			raise TypeError, "variable for _string must be type string"
			
		for i in range(0,len(_list)):
			if pos == 1:
				_list[i] = _string + _list[i]
			else:
				_list[i] = _list[i] + _string
	
	
		return _list
	
	
	def motifRXdict(self, ls):
		"""this will take a list of motifs and return a dicitonary object {} with the values
		containing the appropiate regular expression by calling function motif_re
		since dictionary keys can not be redundant, all redundant entires are not added to the list
		"""

		mDict = {}		
		for i in ls:
                        i = str(i).upper()
			if not mDict.has_key(i):				
				mDict[i] = self.motif_re(i) 
                #end i
		return mDict


	def smartMotifRXdict(self,ls):
		"""this will take a list of motifs and return a dictionary object {} that considers the following:
		sense and antisense together as one motif...AND...will convert composite elements for all watson
		and crick orientations but maintaing their order (5' to 3') with respect to eachother. Input
		is a simple list of DNA motifs. The entered element will be the dictionary key:
		dict{elem:(sense, antisense),...} 
		"""
		
		mDict = {}		
		for i in ls:
                        i = str(i).upper()
                        #print i
			if not mDict.has_key(i):
				regGAP = re.compile('.\{.')
				gp = regGAP.findall(i)
				if gp != []:
					all_combos = self.composite_combos(i)
					for e in range(len(all_combos)):
						all_combos[e] = self.motif_re(all_combos[e])
					mDict[i] = all_combos
					#make all versionsby calling compostie_combos
					#add all the eleme array to the dictornary
				else:
					if i != self.motifRevComp(i):
						mDict[i] = [self.motif_re(i),self.motif_re(self.motifRevComp(i))]
					else:
						mDict[i] = [self.motif_re(i)]
                        #end if mDict
                #end i for
		return mDict

	def composite_combos(self,composite):
		"""this returns a list all possible composite motifs for all watson and crick words for the 
		indivdual members
		"""

		regGAPs = re.compile('{')
		rg = regGAPs.findall(composite)
		
				
		ctr = 0
		s = composite.upper()   #remove case sensitivity
		
		elems = []  #all of the elements, 5' to 3'
		gaps = []  #and the respctive gaps
		while rg != []:
			#print s
			pos = s.find('{')
			pos2 = s.find('}')
			elems.append(s[:pos])
			gaps.append(s[pos:pos2+1])
			s = s[pos2+1:]
			rg = regGAPs.findall(s)
			ctr += 1
		elems.append(s)
		
		#print gaps
		#print elems
		
		#now get the total number of possible motifs
		total_elems = 1
		for i in elems :
			#print i
			#print self.motifRevComp(i)
			if i != self.motifRevComp(i) :
				total_elems *= 2
		#if total_elems < 1 : total_elems = 1
		#if there is only one motif, we return the one entered
		if total_elems == 1 : 
			all_motifs = [composite]
			return all_motifs

		#print elems
		#print total_elems
		#go through that many times and build all the possible strings up
		all_motifs = [''] * total_elems
		break_point = total_elems/2

		count_point = 0
		for ix in range(len(elems)) :
			#print break_point
			#print "break point"
			for i2 in range(total_elems) :
				count_point += 1
				#print str(i2) + " " + str(count_point)
				if elems[ix] != self.motifRevComp(elems[ix]) :
					if count_point > break_point :
						all_motifs[i2] += elems[ix]
					else:
						all_motifs[i2] += self.motifRevComp(elems[ix])
				else:
					all_motifs[i2] += elems[ix]
				#end if reverse or not
				if ix < len(gaps) : all_motifs[i2] += gaps[ix]
				if count_point+1 > (break_point*2) : count_point = 0
			#end for i2
			if elems[ix] != self.motifRevComp(elems[ix]) : break_point /= 2
		#end for ix
		return all_motifs
		
		
		
		
		
	def motif_re(self, _m):
		"""returns motif's regex expression (non-overlapping)
		 local variable _convertDict should carry the conversion dictionary to be used to 
		 create the regular expression e.g. {N:A|T|C|G,} this version is case INSENSITIVE, 
		 change self.dictIUPAC to alter the converion rules
		"""	

		###first we change all GAPs###

		regGAP = re.compile('.\{(\d+)\}.')
		regCOMMA = re.compile('\{(\d+)\,(\d+)\}')
		
		gp = regGAP.findall(_m)
		if gp != []:
			for g in gp:
				Ns = 'N' * int(g)
				_m = _m.replace('{' + g + '}',Ns)

		gp = regCOMMA.findall(_m)	
		if gp != []:			#{0,5} is N{,5}? (meaning that the zero match should be taken first)		
			for g in gp:
				#print g[0]
				if int(g[1]) <= 0: #the user typed in the wrong things
					pass  #the second gap variable cant be less than zero
				elif int(g[0]) < 0:
					pass  #the first cant be less than 0
				elif int(g[0]) > int(g[1]):
					pass  #the second must be bigger than the first
				else:
					_old = '{' + str(g[0]) + ',' + str(g[1]) + '}'
					
					if int(g[0]) == 0:
						_new  = 'N{,' + str(g[1]) + '}?'
						_m = _m.replace(_old, _new)
					else:
						_new  = 'N{' + str(g[0]) + ',' + str(g[1]) + '}?'
						_m = _m.replace(_old, _new) 

		#since the {\d,\d} string is retained, we get NN{\d,\d}?? as a 
		#corrected string, to end this, i use this HACK:
		
		_m = _m.replace('NN{','N{')
		_m = _m.replace('}??','}?')			
		
		###now we replace the Varaible letters (default for DNA IUPAC)
		_m = _m.upper() 			#convert to upper case

		for k in self.dictIUPAC:
			_m = _m.replace(k,self.dictIUPAC[k])
                #end k for
		return _m


	def motifRevComp(self, motif):
		"""returns the reverse complement of a motif, IUPAC letters included
		"""

		motif = motif.upper()
		_m = ''

		for m in motif[::-1]:
			if self.revIUPAC.has_key(m):
				_m += self.revIUPAC[m]
			else:
				_m += m
                #end m for
		return _m

	
	def listWrevcomps(self, ls):
		"""returns a list with its reverse complements. 
		You do not have to worry about removing redundancy
		since a dictionary can contain only unique keys
		"""
		newlist = []

		for m in ls:
			newlist.append(m)
			if m != self.motifRevComp(m):
				newlist.append(self.motifRevComp(m))
                #end m for
		return newlist

	





	def Permute(self, _oliKombar, _oliMotifperm, OlliArr, L, m_len):
		"""to be called by allCombinations
		"""

		_list = []

		if L == m_len:
			OutMotif = ''
			for ii in range(m_len):
				OutMotif += OlliArr[_oliMotifperm[ii]]

			_list.append(OutMotif)

		else:
			for ii in range(len(_oliKombar)):
				_oliMotifperm[L] = int(_oliKombar[ii])
				
				_oliKombarCPY = copy.deepcopy(_oliKombar)
				_oliKombarCPY = _oliKombarCPY.replace(_oliKombar[ii], '')
				_list = _list + self.Permute(_oliKombarCPY, _oliMotifperm, OlliArr, L+1, m_len)
                #end if
		return _list




	def allcombinations(self, motif):
		"""this will take a motif and return a list of all possible
		combinations. You do not have to worry about removing redundancy
		since a dictionary can contain only unique keys
		"""

		OliArray = [''] * (len(motif))
		oliKombar = ''
		oliMotifArr = [0] * (len(motif))		

		if len(motif) > 15:
			print "motif too long"
			pass
		else:

			for i in range(0,len(motif)):
				OliArray[i] = motif[i]
				oliKombar += str(i)
				oliMotifArr[i] = 0
                        #end i for
			
			list = self.Permute(oliKombar, oliMotifArr, OliArray, 0, len(motif))
                #end if
		return list





























