"""use this module to map motifs to FASTA files. since this can also accept
a queue of files, if you want to get the matches from memory, you need to 
run the MapFasta.map() for one file, save it to a variable, and then 
continue with another file, otherwise it will be lost when processing the 
next file.

Author: Kenneth Wayne Berendzen -> kwberendzen@alumni.utexas.net
License: GPLv2.0
Copyright: Kenneth Wayne Berendzen and University of Tuebingen 2008   
"""

############modules#############
#local modules
import fasta
import motifs
import folders

#global modules
from Bio import Seq
from Bio import SeqRecord
from Bio import Alphabet
import Bio.Alphabet.IUPAC
import types
import re
import os
import copy
import time


###########Classes################
class MapFasta:
	"""Global Variables: 
	outputMode: 0 = memory (in self.hits_local and self.hits_global) no file handles opened,
	 only the last file is retained if you enter more than one
	1 = to disk directly (only for MTBS); 
	2 = process all in memory, when finished dump to disk;
	3 = process in memory (as outputMode 0), openfile handles but does not save to disk, call writeOutput and then closeOutput

	fileScope: 0 = return per sequence and file whole, 
	 1 = only per seq, 
	 2 = only file whole should be called for each file 

	use M for motifs.Motifs()
	F_loader used for fasta.Loader(), instance made as needed
	Fiter used for fasta.Iterator(), instance made as needed

	Fdict should be used only if the user want to map sequences from memory
	 load sequences using addSEQtoFdict and call mapObjs(). You may save the results
	 to a file, or keep them in memory, in which case they are save in self.hits_local
	 if you want to use a list of FastaRecord make the list first and use fasta.convert_fasta_to_dict

	self.hits_local contains the DICT of shortname{(seqlen,motifs{[(begin,end),(begin,end)]})}  (MTBS)
	self.hits_global contains the dictionary MOTIFS:->HITS (MMTB)
	self.fileAppend holds a string used to append to a map() call
	"""


	def __init__(self, _files = [], _outputMode = 0, _fileScope = 0, _smartmotif = 0):
		self.files = _files		#holds file paths
		self.smartmotif = _smartmotif   #is the smart motif toggle for MapFiles, on means automatic antisene and all combos for composite motifs
		self.motifsRX = {}		#holds motifs to be mapped
		self.outputMode = _outputMode	#holds output type
		self.fileScope = _fileScope	#holds the option for mapping, only per seq1 or all2, or both0
		self.hits_local	= {} 		#holds hits for each sequence :SEQ->(len,MOTIFs->HITS)
		self.hits_global = {}      	#holds hits for the whole file MOTIF:->HITS
		self.M = motifs.Motifs() 	#make a Motifs class instance
		self.F_loader = '' 		##fasta.FASTA_dict() #reserve for FASTA_dict class instance
		self.Fiter = '' 		##fasta.Iterator()   #reserve for FASTA.Iterator class instance
		self.Fld = folders.Folders()	#make a Folders class instance
		self.Fdict = {}			#holds a dictonary of FastaRecords, use for mapObjs only!
		self.localhits_dict = {}	#holds match TUPLES as dictionary, use with mapObjs only!
		self.HandleMTBS	= None		#holds the open handle for the writing output
		self.HandleMMTB = None
		self.fileAppend = ''		#holds a string for appending to an anaylsis from a map() call
		self.pointMapData = {}		#holds whatever self.pointMaps() can make
		self.pointCurveData = {}	#holds whatever self.pointCurves() can make
		self.globalallfiles = 0         #0 means hits_global is global per file, otherwise, over all files and all sequences



	def clear(self):
		self.files = []			#holds file paths
		self.motifsRX = {}		#holds motifs to be mapped
		self.outputMode = 0		#holds output type
		self.fileScope = 0		#holds the option for mapping, only per seq1 or all2, or both0
		self.hits_local	= {} 		#holds hits for each sequence :SEQ->(len,MOTIFs->HITS)
		self.hits_global = {}      	#holds hits for the whole file MOTIF:->HITS
		self.M = motifs.Motifs() 	#make a Motifs class instance
		self.F_loader = '' 		##fasta.FASTA_dict() #reserve for FASTA_dict class instance
		self.Fiter = '' 		##fasta.Iterator()   #reserve for FASTA.Iterator class instance
		self.Fld = folders.Folders()	#make a Folders class instance
		self.Fdict = {}			#holds a dictonary of FastaRecords, use for mapObjs only!
		self.localhits_dict = {}	#holds match TUPLES as dictionary, use with mapObjs only!
		self.HandleMTBS	= None		#holds the open handle for the writing output
		self.HandleMMTB = None
		self.fileAppend = ''		#holds a string for appending to an anaylsis from a map() call
		self.pointMapData = {}		#holds whatever self.pointMaps() can make
		self.pointCurveData = {}	#holds whatever self.pointCurves() can make

	def __str__(self):
		s = "Your root path is: " + str(self.Fld.settings['ROOTFOLDER']) + "\n" + "this is under self.Fld.settings['ROOTFOLDER']"
		s = s + "\nTo use: loadMotifs() and loadFiles(), then run mapFiles() or run pointMaps(len) or pointCurves(len)"
		s = s + "\nloadFiles(0,directory) or loadFiles(1) - you will enter them by hand from a function call"
		s = s + "\nloadMotifs(0,OpenMotifsHandle) or loadMotifs(1,list[])"
		s = s + "\nSequences in Fdict (mapObjs): " + str(len(self.Fdict))
		s = s + "\nfileAppend:" + self.fileAppend
		s = s + "\noutputMode: " + str(self.outputMode)
		s = s + "\nfileScope: "+ str(self.fileScope)
		s = s + "\nmotifs loaded: " + str(len(self.motifsRX))
		s = s + "\nfiles queued: " + str(len(self.files))
		s = s + "\nSequences Mapped (hits_local): " + str(len(self.hits_local))
		s = s + "\nAll motifs over all Seqs (hits_global): " + str(len(self.hits_global))
		s = s + "\npointMapData: " + str(len(self.pointMapData))
		s = s + "\npointCurveData: " + str(len(self.pointCurveData))
		s = s + "\nsmartmotif: " + str(self.smartmotif)
		s = s + "\nglobalallfiles: " + str(self.globalallfiles)
		return s
	

	def loadFiles(self, man = 0, dir = ''):
		"""this calls the file path reading fuction of the Folders class. 
		man = 0, all from a folder and requires a directory path, 
		man = 1 enter files by hand
		"""
		self.files = self.Fld.makeFilelist(man,dir)


	def checkMODES(self):
		"""this is used to check the user has not altered these variables into 
		something usless.
		"""
		if type(self.outputMode) != types.IntType:
			raise TypeError, "ouptutMode must be an integer"
			return 0

		if self.outputMode < 0 or self.outputMode > 3:
			raise ValueError, "outputMode must be a 0, 1, 2 or 3"
			return 0
		
		if type(self.fileScope) != types.IntType:
			raise TypeError, "ouptutMode must be an integer"
			return 0

		if self.fileScope < 0 or self.fileScope > 2:
			raise ValueError, "outputMode must be a 0, 1, or 2 "
			return 0


	def loadMotifs(self, man = 0, ls = [], motifsHandle = ''):
		"""motifs must have be a dictionary of the entered motif as 
		key and the regular expression as value use the 
		motifs.motifs_re to get the appropiate dictionary
		if manual = 1 then make sure you send in a list
		(the motifsHandle will be automatically closed when done here)
		"""

		if man != 0 and man != 1:
			raise ValueError, "only 1 or 0 for manual choices allowed"

		if not type(ls) == types.ListType:
			raise TypeError, "ls must be of ListType"

		if man == 0 and not type(motifsHandle) == types.FileType:
			raise TypeError, "handle must be an open handle"

		if man == 0:
			if self.smartmotif == 0:
				self.motifsRX = self.M.motifRXdict(self.M.listfromfile(motifsHandle, 0))
			else:
				self.motifsRX = self.M.smartMotifRXdict(self.M.listfromfile(motifsHandle, 0))
		else:#yes manual by passing a list
			if ls != []:
				if self.smartmotif == 0:
					self.motifsRX = self.M.motifRXdict(ls)
				else:
					self.motifsRX = self.M.smartMotifRXdict(ls)
			else:
				raise ValueError, "list was empty"
		#debugging
		#print self.motifsRX



	def mapSEQ(self, seq):
		"""this will map motifs in self.motifsRX for seq passed and
		save them in self.hits_local and/or self.hits_global
		when ignoring the global counts, 
		one can use the hit match objects for other analyses
		{motifKEY:(totalhits, [match start postion, end postion])}
		"""
		
		_d = {}

		for k in self.motifsRX.keys():
			##smart motif choice
			if self.smartmotif == 0:
				rgx = re.compile(self.motifsRX[k], re.IGNORECASE)
				_all = rgx.findall(seq) #findall doesnt tell me where they are
				_matches = rgx.finditer(seq) #this gives me only an iteration object

				if len(_all) > 0:
					if self.fileScope == 0 or self.fileScope == 1:
						#the iterator has to be processed into a tuple since
						#it is not allowed to pass it out of the function scope
						mlist = [] #keeps it more local
						g = _matches.next()					
						while g:
							#the begin and end use the python range function
							#such that begin is inclusive, and end exclusive
							mlist.append((g.start(),g.end()-1)) #we make a list of tuples, len = number of matches						
							try:
								g = _matches.next()
							except:
								g = None

						##option: copy.deepcopy(_all) will send the motifs from findall back
						_d[k] = copy.deepcopy(mlist)

					if self.fileScope != 1:							
						self.hits_global[k] += len(_all)    #global carries all keys
			else: #smart motif on
				mlist = []
				##_d[k] = copy.deepcopy(mlist)
				#print self.motifsRX[k]
				for elem in range(len(self.motifsRX[k])):
					print self.motifsRX[k][elem]
					rgx = re.compile(self.motifsRX[k][elem], re.IGNORECASE)
					_all = rgx.findall(seq) #findall doesnt tell me where they are
					_matches = rgx.finditer(seq) #this gives me only an iteration object

					if len(_all) > 0:
						if self.fileScope == 0 or self.fileScope == 1:
							#the iterator has to be processed into a tuple since
							#it is not allowed to pass it out of the function scope
							g = _matches.next()
							while g:
								#the begin and end use the python range function
								#such that begin is inclusive, and end exclusive
								mlist.append((g.start(),g.end()-1)) #we make a list of tuples, len = number of matches						
								try:
									g = _matches.next()
								except:
									g = None

							##option: copy.deepcopy(_all) will send the motifs from findall back
							#we have to add them all up, result is an unsorted list
							##_d[k].append(copy.deepcopy(mlist))
							
						if self.fileScope != 1:							
							self.hits_global[k] += len(_all)    #global carries all keys
				#since we added all to the list, this keeps the list one set of tuple pairs
				_d[k] = copy.deepcopy(mlist)
			#end smart motif if


		if self.fileScope == 0 or self.fileScope == 1:
			return _d
		else:
			return None








	def setOutput(self, _filename):
		"""this is called when setting the new output folders.
		"""

		if "." in _filename:
			_filename = _filename[:_filename.find('.')]
		_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "MapnCtr" + os.path.sep + _filename


		#print "setting this path: " + _fullpath
		self.Fld.makeDirectory(_fullpath)	

		
		if self.fileScope != 1:
			if self.fileAppend == '':
				self.HandleMMTB = open(_fullpath + os.sep + time.strftime('%m-%d-%y') + ".mmtb", 'w')
			else:
				self.HandleMMTB = open(_fullpath + os.sep + self.fileAppend + ".mmtb", 'w')
		
		if self.fileScope != 2:
			if self.fileAppend == '':
				self.HandleMTBS = open(_fullpath + os.sep + time.strftime('%m-%d-%y') + ".mtbs", 'w')
			else:
				self.HandleMTBS = open(_fullpath + os.sep + self.fileAppend + ".mtbs", 'w')	

			#header line
			self.HandleMTBS.write("seqeunce\tseqlen\tmotif\ttotalhits\tpostions(s-e;)\n")


	def closeOutput(self):
		"""closes the two file handles
		"""

		if self.fileScope != 1:
			self.HandleMMTB.close()

		if self.fileScope != 2:
			self.HandleMTBS.close()


	def writeOutputMMTB(self):
		"""call this only when you want to write out the hits in the 
		self.hits_global
		"""
		if self.hits_global != {}:   
		
			for k in self.hits_global: self.HandleMMTB.write(k + "\t" + str(self.hits_global[k]) + "\n")

		else:
			print "no data is present in hits_global!"
		#end if



	def writeOutputMTBS(self):
		"""this will save anydata that is in memory to the disk location. 
		this best used as a function. self.hits_local is:
		shortname{(seqlen,motifs{[(begin,end),(begin,end)]})} 
		"""
		if self.hits_local != {}:   			

			for sk in self.hits_local:				

				for mk in self.hits_local[sk][1]:
					self.HandleMTBS.write(sk + "\t" + str(self.hits_local[sk][0]) + "\t")
					
					self.HandleMTBS.write(mk + "\t" + str(len(self.hits_local[sk][1][mk])) + "\t")

					for i in range(0,len(self.hits_local[sk][1][mk])):

						if "{" in mk:
							self.HandleMTBS.write(str(self.hits_local[sk][1][mk][i][0]) + "-" + str(self.hits_local[sk][1][mk][i][1]) + ";")
						else:
							self.HandleMTBS.write(str(self.hits_local[sk][1][mk][i][0]) + ";")

					self.HandleMTBS.write("\n")


		else:
			print "no data is present in hits_local!"


	def updateHitsGlobal(self):
		"""copys the keys from motifsRX and set to 0
		"""

		self.hits_global = {}
		for k in self.motifsRX.keys():
			self.hits_global[k] = 0		


	def mapFiles(self):
		"""this is the principle sub-routine for this class which calls/uses 
		all other functions present. hits_global is modifed when this
		def calls mapSEQ
		"""

		### ERROR input handling #####

		#print "checking Mode values"
		self.checkMODES()   #call the check, Error will be raised if there are wrong values
		
		#print "Checking for a loaded file list"
		if self.files == []:
			raise ValueError, "no files to analyze! use loadFiles()"
	
		#print "Checking for a loaded motif list"
		if self.motifsRX == {}:
			raise ValueError, "no motifs are loaded! use loadMotifs()"


		### PROCESSING ###
		print "mode values were cleared"	

			## to capture all hits in all files for all sequences
		if self.globalallfiles != 0 :
			if self.fileScope == 0 or self.fileScope == 2:
				self.updateHitsGlobal()
			else:
				self.hits_global = {}
			#end if
		#end if
		## end greedy bastard stuff

		for i in range(0,len(self.files)):

			
			### variables ###
			#open file handles
			if self.outputMode != 0: self.setOutput(os.path.basename(self.files[i]))

			self.hits_local = {}

			if self.globalallfiles == 0:
				if self.fileScope == 0 or self.fileScope == 2:
					self.updateHitsGlobal()
				else:
					self.hits_global = {}
				#end if
			#end if
			


			## run
			try:				
				##create the instance, send a filehandle
				_counter = 0								
				self.F_loader = fasta.Loader(self.files[i])
				self.F_loader.makedict() #IUPAC DNA letters used as default
				
				print "mapping"
				if self.F_loader.fdict != {}:		##complete FASTA file loaded into memory					
					for sk in self.F_loader.fdict.keys():
						_counter += 1
						if _counter % 100 == 0:
							print str(_counter) + " of " + str(len(self.F_loader.fdict)) + " in the pipeline"						
		
						#find hits for all motifs in seq
						hits = self.mapSEQ(self.F_loader.fdict[sk])	#global set in function call	
																		
						if self.fileScope != 2:
							if hits != {}:
								self.hits_local[sk] = (len(self.F_loader.fdict[sk]), copy.deepcopy(hits))															

								if self.outputMode == 1:		#option SAVE IMMEDIATELY or KEEP in MEMORY, fill the hits_global, then remove it
									self.writeOutputMTBS()		#write to file
									self.hits_local = {}		#reset variable



				else:					##we have to use the iterator
					
					self.Fiter = fasta.Iterator(self.files[i]) #send filepath this time
					
					fREC = self.Fiter.next()
					while fREC:

						#find hits
						hits = self.mapSEQ(fREC.sequence)	#global set in function call	
						
						if self.fileScope!= 2:
							if hits != {}:	
								self.hits_local[fREC.shortname] = (fREC.len, copy.deepcopy(hits))
								
								if self.outputMode == 1:	#option SAVE IMMEDIATELY or KEEP in MEMORY, fill the hits_global, then remove it
									self.writeOutputMTBS()	#write to file
									self.hits_local = {}	#reset variable



						#get next sequence
						try:
							fREC = self.Fiter.next()
						except:
							fREC = None


					

				
				###for saving hits_global and/or hits_local to text file###
				print "finished"
				#SAVE options!
				if self.outputMode == 2 or self.outputMode == 1:
					
					if self.fileScope != 1:
						self.writeOutputMMTB()

					if self.fileScope != 2 and self.outputMode == 2:
						self.writeOutputMTBS()

					### close ###
					self.closeOutput()





			except IOError, e:
				print e
				print str(self.files[i]) + " could not be opened"
				

			

	def addSEQtoFdict(self, name, sequence, _alphabet = Alphabet.IUPAC.ambiguous_dna):
		"""this will add an FastaRecord entry to Fdict as a dictionary, 
		to be called with mapObjs. To Map from files, use mapFiles.
		"""

		rstr = '[^' + _alphabet.letters + ']'         	#the letter string is the variable letters
		regStr = re.compile(rstr, re.IGNORECASE)      	#regex to remove non-alphabet members from the seq string		
		sequence = regStr.sub('',sequence)		##clean up the sequence


		if self.Fdict.has_key(name):		#if there is a redundant entry name, send a message and add an integet to it to prevent crashes
						
			print "name %s is already present!, appending numeral\n", name
			n = 1
			name = name + str(n)
			while self.Fdict.has_key(name):
				n += 1
				name = name + str(n)
			self.Fdict[name] = sequence  #if the key name is identical the entry is not added to the dictionary
		else:
			self.Fdict[name] = sequence


	def pointMapsFromMemory(self, _length):
		"""this should map PromoterPointMaps, that is we take the
		counts in their beginning 5' position and save them to a list
		by increasing the value for 1 for each position. The user
		defines a given length that all the sequences should have;
		those that do not have this length are not considered.
		To save the data to disk call savePointMaps()
		This processes any data in self.hits_local to make Maps
		"""

		### ERROR input handling #####		

		# test
		if type(_length) != types.IntType:
			print type(_length)
			raise TypeError, "common promoter length must be an integer"
			return 0		

		### PROCESSING ###
		print "doing it"		
		#self.outputMode = 0
		#old method called up the hits fist, you do that now manually
		#self.mapFiles() #there is no loop control, all results are fused if more than one file is entered

		#now process the hits, more or less like the outputs
		self.pointMapData = {}
		for k in self.motifsRX.keys():  # need one full array of zeros for each motif
			self.pointMapData[k] = [0 for i in range(_length)]

		for v in self.hits_local.values():
			for k in self.motifsRX.keys():		
				if v[0] == _length:
					if v[1].has_key(k):
						if self.pointMapData.has_key(k):
							#print v[1][k]
							for i in v[1][k]:
								#print i[0]
								self.pointMapData[k][i[0]] += 1

	def pointCurvesFromMemory(self, _length):
		"""this should map PromoterPointCurves, that is we take the
		counts in their beginning 5' position and save them to a list
		by increasing the value for 1 at each position for the length of the motif. The user
		defines a given length that all the sequences should have;
		those that do not have this length are not considered.
		To save the data to disk call savePointCurves()
		This processes any data in self.hits_local to make Curves
		"""

		### ERROR input handling #####
		
		# test
		if type(_length) != types.IntType:
			print type(_length)
			raise TypeError, "common promoter length must be an integer"
			return 0		
		#end if

		### PROCESSING ###
		print "doing it"		
		#self.outputMode = 0
		#old method called up the hits fist, you do that now manually
		#self.mapFiles() #there is no loop control, all results are fused if more than one file is entered

		#now process the hits, more or less like the outputs
		self.pointCurveData = {}
		for k in self.motifsRX.keys():  # need one full array of zeros for each motif
			self.pointCurveData[k] = [0 for i in range(_length)]

		for v in self.hits_local.values():
			for k in self.motifsRX.keys():		
				if v[0] == _length:
					if v[1].has_key(k):
						if self.pointCurveData.has_key(k):
							#print v[1][k]
							for i in v[1][k]:
									#print i[0]
									for i2 in range(i[0], (i[1]+1)):
										self.pointCurveData[k][i2] += 1





	def savePointMaps(self,_style=0,append=''):
		"""saves the PointMap data to a file, you may add an appending name to file
		"""
		if _style < 0 or _style > 1:
			raise ValueError, "_style can be 0 (individual files) or 1 (one file) only\n"


		if append == '':
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + time.strftime('%m-%d-%y')
		else:
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + append

		self.Fld.makeDirectory(_fullpath)


		if _style == 0: #one file for every motif
			for k in self.pointMapData.keys():				
				HandleMAP = open(_fullpath + os.sep + str(k) + ".pmap", 'w')
				HandleMAP.write(str(k)+"\n\n")
				for i in self.pointMapData[k]:
					HandleMAP.write(str(i)+"\n")
				HandleMAP.close()


		else: #one big file
			HandleMAP = open(_fullpath + os.sep + append + "_" + str(len(self.pointMapData)) + "motifsPMAP.txt", 'w')
			for k in self.pointMapData.keys():								
				HandleMAP.write(str(k)+"\t")
				for i in self.pointMapData[k]:
					HandleMAP.write(str(i)+";")
				HandleMAP.write("\n")
			HandleMAP.close()


		print "Data(s) saved to :" + _fullpath


	def pointMaps(self, _length):
		"""same as pointMapsFromMemory but reads FASTA files
		"""

		### ERROR input handling #####
		
		#print "checking Mode values"
		self.checkMODES()   #call the check, Error will be raised if there are wrong values
	
		#print "Checking for a loaded motif list"
		if self.motifsRX == {}:
			raise ValueError, "no motifs are loaded! use loadMotifs()"

		#print "Checking for a loaded file list"
		if self.files == []:
			raise ValueError, "no files to analyze! use loadFiles()"

		# test
		if type(_length) != types.IntType:
			print type(_length)
			raise TypeError, "common promoter length must be an integer"
			return 0		

		### PROCESSING ###
		print "here we go\n"
		self.outputMode = 0
		self.updateHitsGlobal()
		#self.mapFiles()  #older version, was nice, all in memory, but 2GB wasnt enough every time

		#now process the hits, more or less like the outputs
		self.pointMapData = {}	
		for k in self.motifsRX.keys():	# need one full array of zeros for each motif
			self.pointMapData[k] = [0 for i in range(_length)]

		#for really large sets, you need GBs of memory
		for i in range(0,len(self.files)):
			##create the instance, send a filehandle
			_counter = 0								
			self.F_loader = fasta.Loader(self.files[i])
			self.F_loader.makedict() #IUPAC DNA letters used as default
				
			print "mapping"
			if self.F_loader.fdict != {}:		##complete FASTA file loaded into memory?					
				for sk in self.F_loader.fdict.keys():
					_counter += 1
					self.hits_local = {}
					if _counter % 100 == 0: print str(_counter) + " of " + str(len(self.F_loader.fdict)) + " in the pipeline"

					#find hits for all motifs in seq
					hits = self.mapSEQ(self.F_loader.fdict[sk])	#global set in function call	
																	
					if hits != {}:
						self.hits_local[sk] = (len(self.F_loader.fdict[sk]), copy.deepcopy(hits))																							
						for v in self.hits_local.values():
							for k in self.motifsRX.keys():		
								if v[0] == _length:
									if v[1].has_key(k):
										if self.pointMapData.has_key(k):
											#print v[1][k]
											for i in v[1][k]:
												#print i[0]
												self.pointMapData[k][i[0]] += 1
		self.hits_local = {}


	def statsOfPointCurves(self, _length, append=''):
		"""um, just a little thing that calls PointCurves and then
		saves the var() of the pointCurveData
		requires package numpy
		requires that there are files(best just one) and motifs to be loaded
		"""

		import numpy as np
		self.pointCurves(_length)
		self.savePointCurves(1,append)

		if append == '':
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + time.strftime('%m-%d-%y')
		else:
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + append

		self.Fld.makeDirectory(_fullpath)
		
		HandleMAP = open(_fullpath + os.path.sep + append + "_" + str(len(self.pointCurveData)) + "motifsPSUMVAR.txt", 'w')
		HandleMAP.write('Motif\tVar\tStd\tMean\tMax\tMin\n')
		for k in self.pointCurveData.keys():								
			HandleMAP.write(str(k)+"\t")
			HandleMAP.write(str(np.var(self.pointCurveData[k]))+"\t")
			HandleMAP.write(str(np.std(self.pointCurveData[k]))+"\t")
			HandleMAP.write(str(np.mean(self.pointCurveData[k]))+"\t")
			HandleMAP.write(str(np.max(self.pointCurveData[k]))+"\t")
			HandleMAP.write(str(np.min(self.pointCurveData[k]))+"\t")
			HandleMAP.write("\n")
		HandleMAP.close()

	def statsOfPointMaps(self, _length, append=''):
		"""um, just a little thing that calls PointMaps and then
		saves the var() of the pointMapData
		requires package numpy
		requires that there are files(best just one) and motifs to be loaded
		"""

		import numpy as np
		self.pointMaps(_length)
		self.savePointMaps(1,append)

		if append == '':
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + time.strftime('%m-%d-%y')
		else:
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + append

		self.Fld.makeDirectory(_fullpath)
		
		HandleMAP = open(_fullpath + os.path.sep  + append + "_" + str(len(self.pointMapData)) + "motifsPMAPVAR.txt", 'w')
		HandleMAP.write('Motif\tVar\tStd\tMean\tMax\tMin\n')
		for k in self.pointMapData.keys():
			HandleMAP.write(str(k)+"\t")
			HandleMAP.write(str(np.var(self.pointMapData[k]))+"\t")
			HandleMAP.write(str(np.std(self.pointMapData[k]))+"\t")
			HandleMAP.write(str(np.mean(self.pointMapData[k]))+"\t")
			HandleMAP.write(str(np.max(self.pointMapData[k]))+"\t")
			HandleMAP.write(str(np.min(self.pointMapData[k]))+"\t")
			HandleMAP.write("\n")
		HandleMAP.close()               


	def pointCurves(self, _length):
		"""same as pointCurvesFromMemory but reads FASTA files.
		"""

		### ERROR input handling #####
		
		#print "checking Mode values"
		self.checkMODES()   #call the check, Error will be raised if there are wrong values
	
		#print "Checking for a loaded motif list"
		if self.motifsRX == {}:
			raise ValueError, "no motifs are loaded! use loadMotifs()"

		#print "Checking for a loaded file list"
		if self.files == []:
			raise ValueError, "no files to analyze! use loadFiles()"

		# test
		if type(_length) != types.IntType:
			print type(_length)
			raise TypeError, "common promoter length must be an integer"
			return 0
		#end if

		### PROCESSING ###
		print "here we go\n"
		self.outputMode = 0
		self.updateHitsGlobal()
		#self.mapFiles()  #older version, was nice, all in memory, but 2GB wasnt enough every time

		#now process the hits, more or less like the outputs
		self.pointCurveData = {}	
		for k in self.motifsRX.keys():	# need one full array of zeros for each motif
			self.pointCurveData[k] = [0 for i in range(_length)]

		#for really large sets, you need GBs of memory
		for i in range(0,len(self.files)):
			##create the instance, send a filehandle
			_counter = 0
			self.F_loader = fasta.Loader(self.files[i])
			self.F_loader.makedict() #IUPAC DNA letters used as default
				
			print "mapping"
			if self.F_loader.fdict != {}:		##complete FASTA file loaded into memory?					
				for sk in self.F_loader.fdict.keys():
					_counter += 1
					self.hits_local = {}
					if _counter % 100 == 0: print str(_counter) + " of " + str(len(self.F_loader.fdict)) + " in the pipeline"						

					#find hits for all motifs in seq
					hits = self.mapSEQ(self.F_loader.fdict[sk])	#global set in function call	
					if hits != {}:
						self.hits_local[sk] = (len(self.F_loader.fdict[sk]), copy.deepcopy(hits))
						for v in self.hits_local.values():
							for k in self.motifsRX.keys():		
								if v[0] == _length:
									if v[1].has_key(k):
										if self.pointCurveData.has_key(k):
											#print v[1][k]
											for i in v[1][k]:
												#print i[0]
												for i2 in range(i[0], (i[1]+1)):
													self.pointCurveData[k][i2] += 1
		self.hits_local = {}
												



	def savePointCurves(self,_style=0,append=''):
		"""saves the PointMap data to a file, you may add an appending name to file
		"""
		if _style < 0 or _style > 1:
			raise ValueError, "_style can be 0 (individual files) or 1 (one file) only\n"


		if append == '':
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + time.strftime('%m-%d-%y')
		else:
			_fullpath = self.Fld.settings['ROOTFOLDER'] + os.path.sep + "PointMappe" + os.sep + append

		self.Fld.makeDirectory(_fullpath)


		if _style == 0: #one file for every motif
			for k in self.pointCurveData.keys():				
				HandleMAP = open(_fullpath + os.sep + str(k) + ".psum", 'w')
				HandleMAP.write(str(k)+"\n\n")
				for i in self.pointCurveData[k]:
					HandleMAP.write(str(i)+"\n")
				HandleMAP.close()


		else: #one big file
			HandleMAP = open(_fullpath + os.sep + append + "_" + str(len(self.pointCurveData)) + "motifsPSUM.txt", 'w')
			for k in self.pointCurveData.keys():								
				HandleMAP.write(str(k)+"\t")
				for i in self.pointCurveData[k]:
					HandleMAP.write(str(i)+";")
				HandleMAP.write("\n")
			HandleMAP.close()


		print "Data(s) saved to :" + _fullpath
						
					

	def mapObjs(self, outputname = '', savetofile = 0):
		"""use for mapping sequences in memory only present in Fdict 
		Fdict should be filled by calling addSEQtodict.		
		FileScope is used to deterime which output you need (MTBS or MMTB).
		OutputMode is not used. You can dump the results to a file by changing 
		savetofile to 1, otherwise outputname is not revelant. FileAppend is 
		still active and will append to the output name when saving to a file.
		"""

		### ERROR input handling #####
		
		#print "checking Mode values"
		self.checkMODES()   #call the check, Error will be raised if there are wrong values
	
		#print "Checking for a loaded motif list"
		if self.motifsRX == {}:
			raise ValueError, "no motifs are loaded! use loadMotifs()"

		#print "Checking for a loaded FASTA objects"
		if self.Fdict == {}:
			raise ValueError, "no FASTA objects are loaded! add"

		### PROCESSING ###
		print "mode values were cleared"	
		

		
		### variables ###		
		self.hitslocal_dict = {}
		self.updateHitsGlobal()		
		

		## run
		if savetofile == 1 :
			self.setOutput(outputname)

		try:				
		
			print "mapping"
			self.hits_local = {}

			if self.Fdict != {}:		##complete FASTA file loaded into memory					
				for sk in self.Fdict.keys():
					
					#find hits
					hits = self.mapSEQ(self.Fdict[sk])	#global set in function call	
																	
					if self.fileScope != 2:
						if hits != {}:
							self.hits_local[sk] = (len(self.Fdict[sk]), copy.deepcopy(hits))															
								
							if savetofile == 1 and self.fileScope != 2:		#option SAVE IMMEDIATELY or KEEP in MEMORY, fill the hits_global, then remove it
								self.writeOutputMTBS()		#write to file

							else:
								self.localhits_dict[sk] = copy.deepcopy(self.hits_local[sk])

							self.hits_local = {}		#reset variable

	
		
				
			#SAVE options!

			if savetofile == 1:
				if self.fileScope != 1: self.writeOutputMMTB()				
				self.closeOutput()
			print "finished"

		except IOError, e:
			print e

			
			










	
	
