"""
module will accept a list of GenBank files and 
provides several parameters for extracting seqeunce
from them. Primarly designed for promoter extraction.
This class needs the classes within the "fasta" module
from the Motif Mapper collection. The FASTA records can
be converted to Bio - SeqRecord if needed
Author: Kenneth Wayne Berendzen -> kwberendzen@alumni.utexas.net
License: GPLv2.0
Copyright: Kenneth Wayne Berendzen and University of Tuebingen 2008

Classes:
	this module
	
	Members:	
	gbfiles	this will hold the list of GB files
	extracted	this will hold the list of fasta seqs as FastaRecord
	dict		shortname: FastaRecord 	

	start		start position (5')
	end		end position (3')

	overlap		if overlap should be allowed with 
			 upstream genes (1 = yes)

	mRNAannot	0 = uses the gene annotation 
			 (necessary for certian files) 
			1 = mRNA annotation

	root		0 = ATG only, 1 = TSS only, 
			2 = TSS if possible, otherwise take the ATG





"""
######IMPORTS#######
import types
import re
import string
import copy
import fasta
import copy

from Bio import SeqIO




######aGBSQL######


class gbSQL:


	def __init__(self, _files = [], _start = -1000, _end = 0, _root = 0, _mRNAannot = 0, _overlap = 1):
		self.gbfiles = _files
		self.extracted = []
		self.dict = {}
		self.start = _start
		self.end = _end
		self.root = _root
		self.mRNAannot = _mRNAannot
		self.overlap = _overlap


	def __str__(self):
		s = " \nset self.gbfiles as a list of paths; \n then run getpromoters_gene() or getpromoters_locustag()\nnames are also variables"
		s += "\n start: %i \n end: %i \n root: %i \n overlap: %i \n option mRNAannot: %i \n" % (self.start, self.end, self.root, self.overlap, self.mRNAannot)
		s += "\n number GBfiles files loaded (gbfiles) %i \n number fasta seqs loaded (extracted) %i \n number dictionary entries (dict) %i \n" % (len(self.gbfiles), len(self.extracted), len(self.dict))
		return s
		
		
	def checkvalues(self):
		"""this is called to check to see if the values are appropiate before running.
		if there is an error, an error message is sent and the script terminated.
		"""
		if self.end < self.start:
			return 0
	
		if self.root < 0 | self.root > 2:
			return 0
	
		if self.mRNAannot < 0 | self.mRNAannot > 1:
			return 0
	
		if self.overlap < 0 | self.overlap > 1 :
			return 0
	
		return 1
	

	def getsubset(self, _locustags = []):
		"""this will return a subset of genes
		"""
		_subset = []
		for _tag in _locustags:
			if _tag in self.dict:
				_subset.append(self.dict[_tag])
		#end for
		return _subset


	def savefastatofile(self, _filehandle):
		"""this will save a fasta list to a text file.
		It works by calling fasta.write_to_file
		"""
		fasta.write_to_single_file(self.extracted, _filehandle)


	def savedicttofile(self, _filehandle):
		"""this will save the dict entries to a text file.
		"""
		for _rec in self.dict.values():
			_filehandle.write('>%s %s\n%s\n' % (_rec.shortname, _rec.long_name, _rec.sequence))

		_filehandle.flush()


	def clear(self):
		"""clears data in self.extracted and self.dict
		"""
		self.extracted = []
		self.dict = {}


	def getpromoters_gene(self):
		
		########local variables######

		_fastaRecord = fasta.FastaRecord()  #instance
		_start = 0
		_end = 0
		_ok = 0
		_genecount = 0

		previousgene = ""
		previouscds = ""
		previousmRNA = ""
		currentgene = ""
		currentmRNA = ""
		currrentcds = ""

			### start file iteration ###
		print "\n here we go - please wait"
		ctr = 0

		for _file in self.gbfiles :

			print "\nparsing file %s \n" % _file
			for seq_record in SeqIO.parse(open(_file), "genbank") : 
				print "done"
				#save them in the damn object - seq_records               
				#i also do this, so that I have the whole 
				#sequence in memory and can process the 
				#promoters immediately
				
			#since there is no real object to pass - I initilized them all with the header
			
			currentgene = copy.deepcopy(seq_record.features[0])
			currentmRNA = copy.deepcopy(seq_record.features[0])
			currentcds = copy.deepcopy(seq_record.features[0])
			previousgene = copy.deepcopy(seq_record.features[0])
			previouscds = copy.deepcopy(seq_record.features[0])
			previousmRNA = copy.deepcopy(seq_record.features[0])


			#print seq_record.features
		
			for seq_feature in seq_record.features :

				#print seq_feature
				if 'gene' in seq_feature.qualifiers:
					
					ctr += 1
					if (ctr % 100) == 0 :
						print seq_feature.qualifiers['gene'][0]
						print " "
							 
					if seq_feature.type == "gene" :
						currentgene = copy.deepcopy(seq_feature)
						_genecount += 1
						
					if seq_feature.type == "CDS" : currentcds = copy.deepcopy(seq_feature)
					if seq_feature.type == "mRNA" : currentmRNA = copy.deepcopy(seq_feature)
	

				#the order is 'gene', 'mRNA' (if present), 'cds'
				if 'gene' in currentgene.qualifiers and 'gene' in currentcds.qualifiers :
					if currentgene.qualifiers['gene'][0] == currentcds.qualifiers['gene'][0]:
								
						if not ('gene' in previousgene.qualifiers) :
								_ok = 1
						elif ('gene' in previousgene.qualifiers) :
								if previousgene.qualifiers['gene'][0] != currentgene.qualifiers['gene'][0] :
										_ok = 1
								else:
										_ok = 0
										#print "previous was same gene"
						else: #if not prev gene
								_ok = 0


						#process and extract the promoters						
						if _ok == 1 :	

							if self.root == 0:
									_fastaRecord = fasta.FastaRecord()  #instance must be fresh, otherwise everything is point to the same thing
									_fastaRecord.shortname = currentcds.qualifiers['gene'][0]
									try:
										_fastaRecord.long_name = _fastaRecord.shortname + " " + currentgene.qualifiers['db_xref'][0] + " on %i strand" % (currentcds.strand)
									except:
										_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (currentcds.strand)

									#print currentcds.location.start.position + 1 #+ (-1 * self.end)
									#print currentcds.location.end.position   #+ (-1 * self.start) 

									if currentcds.strand == -1 :
											if self.overlap == 1 :
													_start = currentcds.location.end.position + (-1 * self.end)
													_end = currentcds.location.end.position + (-1 * self.start) 
													if _end > len(seq_record.seq) : _end = len(seq_record.seq)
																						 
													_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
													self.dict[_fastaRecord.shortname] = _fastaRecord
													self.extracted.append(_fastaRecord)

											else: #overlap in antisense

												if previousgene.strand == -1 :
													if 'gene' in previousgene.qualifiers and 'gene' in previouscds.qualifiers  :
															if previousgene.qualifiers['gene'][0] == previouscds.qualifiers['gene'][0] :
																	_fastaRecord = fasta.FastaRecord() 
																	_fastaRecord.shortname = previouscds.qualifiers['gene'][0]
																	try:
																		_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
																	except:
																		_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
																	_start = previouscds.location.end.position + (-1 * self.end)
																	_end = previouscds.location.end.position + (-1 * self.start) 
																	if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																						 
																	_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
																	self.dict[_fastaRecord.shortname] = _fastaRecord

																	self.extracted.append(_fastaRecord)
																	#print 'u' + previousgene.qualifiers['gene'][0]	        

									else: #in sense orientation

											if self.overlap == 1 :	 
													_start = currentcds.location.start.position + self.start
													if _start < 0 : _start = 0
													_end = currentcds.location.start.position + self.end    
																																																				  
													_fastaRecord.sequence = seq_record.seq[_start:_end].tostring()
													self.dict[_fastaRecord.shortname] = _fastaRecord

													self.extracted.append(_fastaRecord)

											else : #overlap in sense

													if 'gene' in previousgene.qualifiers :

															#current gene is sense
															_start = currentcds.location.start.position + self.start
															if _start < previousgene.location.end.position : _start = previousgene.location.end.position
															_end = currentcds.location.start.position + self.end 
																								 
															_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
															self.dict[_fastaRecord.shortname] = _fastaRecord

															self.extracted.append(_fastaRecord)
															#print 'c' + currentgene.qualifiers['gene'][0]

															#check for the previous gene, if antisense, process
															if previousgene.strand == -1 :
																	if 'gene' in previousgene.qualifiers and 'gene' in previouscds.qualifiers  :
																			if previousgene.qualifiers['gene'][0] == previouscds.qualifiers['gene'][0] :
																							_fastaRecord = fasta.FastaRecord() 
																							_fastaRecord.shortname = previousgene.qualifiers['gene'][0]
																							try:
																								_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
																							except:
																								_fastaRecord.long_name 
																							_start = previouscds.location.end.position + (-1 * self.end)
																							_end = previouscds.location.end.position + (-1 * self.start) 
																							if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																														 
																							_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
																							self.dict[_fastaRecord.shortname] = _fastaRecord

																							self.extracted.append(_fastaRecord)
																							#print 'd' + previousgene.qualifiers['gene'][0]

													else : #gene not in previous but will be taken into account!
															_start = currentcds.location.start.position + self.start
															if _start < 0 : _start = 0
															_end = currentcds.location.start.position + self.end 
																								 
															_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
															self.dict[_fastaRecord.shortname] = _fastaRecord

															self.extracted.append(_fastaRecord)
															#print 'L' + currentcds.qualifiers['gene'][0]
															

									
							else: #self.root == 1
								SeqRecordObj = ''
								prevSeqRecordObj = ''
								_good = 0
								p_good = 0	

								if self.mRNAannot == 1 :

									if 'gene' in currentmRNA.qualifiers :
										SeqRecordObj = copy.deepcopy(currentmRNA)
										prevSeqRecordObj = copy.deepcopy(previousmRNA)
									else:
										SeqRecordObj = copy.deepcopy(currentcds)
										prevSeqRecordObj = copy.deepcopy(previouscds)

								else: #self.mRNAannot == 1
									SeqRecordObj = copy.deepcopy(currentgene)
									prevSeqRecordObj = copy.deepcopy(previousgene)


								if self.root == 1 :
									if currentgene.strand == -1 and (SeqRecordObj.location.end.position > currentcds.location.end.position) :
										_good = 1
									elif (SeqRecordObj.location.start.position < currentcds.location.start.position) :
										_good = 1
									else:
										_good = 0
								else:
									_good = 1 


								if self.root == 1 :
									if previousgene.strand == -1 and (prevSeqRecordObj.location.end.position > previouscds.location.end.position) :
										p_good = 1
									elif (prevSeqRecordObj.location.start.position < previouscds.location.start.position) :
										p_good = 1
									else:
										p_good = 0
								else:
									p_good = 1 


								_fastaRecord = fasta.FastaRecord()  #instance must be fresh, otherwise everything is point to the same thing
								_fastaRecord.shortname = SeqRecordObj.qualifiers['gene'][0]
								try:
									_fastaRecord.long_name = _fastaRecord.shortname + " " + SeqRecordObj.qualifiers['db_xref'][0] + " on %i strand" % (SeqRecordObj.strand)
								except:
									_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (SeqRecordObj.strand)
								if currentcds.strand == -1 :

									if self.overlap == 1 and _good == 1 :
										_start = SeqRecordObj.location.end.position + (-1 * self.end)
										_end = SeqRecordObj.location.end.position + (-1 * self.start) 
										if _end > len(seq_record.seq) : _end = len(seq_record.seq)
										_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
										self.dict[_fastaRecord.shortname] = _fastaRecord
										self.extracted.append(_fastaRecord)

									elif self.overlap == 0 and p_good == 1 :

										if previousgene.strand == -1 :
											if previousgene.qualifiers['gene'][0] != currentgene.qualifiers['gene'][0] :
												_fastaRecord = fasta.FastaRecord() 
												_fastaRecord.shortname = previouscds.qualifiers['gene'][0]
												try:
													_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
												except:
													_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
												_start = prevSeqRecordObj.location.end.position + (-1 * self.end)
												_end = prevSeqRecordObj.location.end.position + (-1 * self.start) 
												if _end > currentgene.location.start.position : _end = currentgene.location.start.position
												_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
												self.dict[_fastaRecord.shortname] = _fastaRecord

												self.extracted.append(_fastaRecord)	        

								else: #in sense orientation

									if self.overlap == 1 and _good == 1 :	 
										_start = SeqRecordObj.location.start.position + self.start
										if _start < 0 : _start = 0
										_end = SeqRecordObj.location.start.position  + self.end    
																																																	  
										_fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	        						
										self.dict[_fastaRecord.shortname] = _fastaRecord

										self.extracted.append(_fastaRecord)

									elif self.overlap == 0 :
											if 'gene' in previousgene.qualifiers :  #since the very first does not
													#current gene is sense, known since this block was called
													if _good == 1 :
														_start = SeqRecordObj.location.start.position + self.start
														if _start < previousgene.location.end.position : _start = previousgene.location.end.position
														_end = SeqRecordObj.location.start.position + self.end 
														_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
														self.dict[_fastaRecord.shortname] = _fastaRecord

														self.extracted.append(_fastaRecord)

													#check for the previous gene, if antisense, process
													if previousgene.strand == -1 and p_good == 1 :
														if previousgene.qualifiers['gene'][0] != currentgene.qualifiers['gene'][0] :
															_fastaRecord = fasta.FastaRecord() 
															_fastaRecord.shortname = previousgene.qualifiers['gene'][0]
															try:
																_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
															except:
																_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
															_start = prevSeqRecordObj.location.end.position + (-1 * self.end)
															_end = prevSeqRecordObj.location.end.position + (-1 * self.start) 
															if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																																	 
															_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
															self.dict[_fastaRecord.shortname] = _fastaRecord
															self.extracted.append(_fastaRecord)

											else : #gene not present
												_start = SeqRecordObj.location.start.position + self.start
												if _start < 0 : _start = 0
												_end = SeqRecordObj.location.start.position + self.end 
																					 
												_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
												self.dict[_fastaRecord.shortname] = _fastaRecord	
												self.extracted.append(_fastaRecord)
	
								

																										



						#as block indent with _ok==1
						previousgene = copy.deepcopy(currentgene)
						previouscds = copy.deepcopy(currentcds) 
						previousmRNA = copy.deepcopy(currentmRNA)
						
			#to catch the last entry if it is antisense                                        
			if self.overlap == 0 and currentgene.strand == -1 :  

				if self.root == 0 :                               
					_start = currentcds.location.end.position + (-1 * self.end)
					_end = currentcds.location.end.position + (-1 * self.start) 

				else:
					_start = SeqRecordObj.location.end.position + (-1 * self.end)
					_end = SeqRecordObj.location.end.position + (-1 * self.start) 
					
				if _end > len(seq_record.seq) : _end = len(seq_record.seq)                                                                                                             
				_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
				self.dict[_fastaRecord.shortname] = _fastaRecord
				self.extracted.append(_fastaRecord)

			print _genecount



	def getpromoters_locustag(self):
	
		########local variables######
	
		_fastaRecord = fasta.FastaRecord()  #instance
		_start = 0
		_end = 0
		_ok = 0
		_genecount = 0

		previousgene = ""
		previouscds = ""
		previousmRNA = ""
		currentgene = ""
		currentmRNA = ""
		currrentcds = ""
	
	
		### start file iteration ###
		print "\n working - please wait"
		ctr = 0

		for _file in self.gbfiles:
			print "\nparsing file %s \n" % _file
			for seq_record in SeqIO.parse(open(_file), "genbank") :
				#save them in the damn object - seq_records        
				print 'done\n' #nothing			
				#i also do this, so that I have the whole 
				#sequence in memory and can process the 
				#promoters immediately
			
			#since there is no real object to pass - I initilized them all with the header
			currentgene = copy.deepcopy(seq_record.features[0])
			currentmRNA = copy.deepcopy(seq_record.features[0])
			currentcds = copy.deepcopy(seq_record.features[0])
			previousgene = copy.deepcopy(seq_record.features[0])
			previouscds = copy.deepcopy(seq_record.features[0])
			previousmRNA = copy.deepcopy(seq_record.features[0])


			#print seq_record.features
			
			for seq_feature in seq_record.features :

				#print seq_feature.type				        				
				if 'locus_tag' in seq_feature.qualifiers :

					ctr += 1
					if ctr % 100 == 0: 
						print seq_feature.qualifiers['locus_tag'][0]
						print " "

					if seq_feature.type == "gene" :
						currentgene = copy.deepcopy(seq_feature)
						_genecount += 1
		
					if seq_feature.type == "CDS" : currentcds = copy.deepcopy(seq_feature)
		
					if seq_feature.type == "mRNA" : currentmRNA = copy.deepcopy(seq_feature)
	

				#the order is 'gene', 'mRNA' (if present), 'cds'


				if 'locus_tag' in currentgene.qualifiers and 'locus_tag' in currentcds.qualifiers :
					if currentgene.qualifiers['locus_tag'][0] == currentcds.qualifiers['locus_tag'][0]:					
							
						if not ('locus_tag' in previousgene.qualifiers) :
							_ok = 1
						elif ('locus_tag' in previousgene.qualifiers) :
							if previousgene.qualifiers['locus_tag'][0] != currentgene.qualifiers['locus_tag'][0] : 	
								_ok = 1
							else: #qualifiers
								_ok = 0

						#process and extract the promoters						
						if _ok == 1 :	
							if self.root == 0:
								
								_fastaRecord = fasta.FastaRecord()  #instance must be fresh, otherwise everything is point to the same thing
								_fastaRecord.shortname = currentcds.qualifiers['locus_tag'][0]
								try:
									_fastaRecord.long_name = _fastaRecord.shortname + " " + currentgene.qualifiers['db_xref'][0] + " on %i strand" % (currentcds.strand)
								except:
									_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (currentcds.strand)
								if currentcds.strand == -1 :
									if self.overlap == 1 :
										_start = currentcds.location.end.position + (-1 * self.end)
										_end = currentcds.location.end.position + (-1 * self.start) 
										if _end > len(seq_record.seq) : _end = len(seq_record.seq)
										_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
										self.dict[_fastaRecord.shortname] = _fastaRecord
										self.extracted.append(_fastaRecord)

									else: #overlap antisense

										if previousgene.strand == -1 :
											if 'locus_tag' in previousgene.qualifiers and 'locus_tag' in previouscds.qualifiers  :
												if previousgene.qualifiers['locus_tag'][0] == previouscds.qualifiers['locus_tag'][0] :
													
													_fastaRecord = fasta.FastaRecord() 
													_fastaRecord.shortname = previouscds.qualifiers['locus_tag'][0]
													try:
														_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
													except:
														_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
													_start = previouscds.location.end.position + (-1 * self.end)
													_end = previouscds.location.end.position + (-1 * self.start)
													if _end > currentgene.location.start.position : _end = currentgene.location.start.position
													
													_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
													self.dict[_fastaRecord.shortname] = _fastaRecord
		
													self.extracted.append(_fastaRecord)
													#print 'u' + previousgene.qualifiers['locus_tag'][0]	        
		
								else: #in sense orientation

									if self.overlap == 1 :	 

										_start = currentcds.location.start.position + self.start
										if _start < 0 : _start = 0
										_end = currentcds.location.start.position  + self.end    
										_fastaRecord.sequence = seq_record.seq[_start:_end].tostring()
										self.dict[_fastaRecord.shortname] = _fastaRecord
										self.extracted.append(_fastaRecord)

									else : #overlap sense

										if 'locus_tag' in previousgene.qualifiers :

											#current gene is sense
											_start = currentcds.location.start.position + self.start
											if _start < previousgene.location.end.position : _start = previousgene.location.end.position
											_end = currentcds.location.start.position + self.end 
											_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
											self.dict[_fastaRecord.shortname] = _fastaRecord
											self.extracted.append(_fastaRecord)
											#print 'c' + currentgene.qualifiers['locus_tag'][0]

											#check for the previous gene, if antisense, process
											if previousgene.strand == -1 :
												if 'locus_tag' in previousgene.qualifiers and 'locus_tag' in previouscds.qualifiers  :
													if previousgene.qualifiers['locus_tag'][0] == previouscds.qualifiers['locus_tag'][0] :
															_fastaRecord = fasta.FastaRecord() 
															_fastaRecord.shortname = previousgene.qualifiers['locus_tag'][0]
															try:
																_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
															except:
																_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
															_start = previouscds.location.end.position + (-1 * self.end)
															_end = previouscds.location.end.position + (-1 * self.start) 
															if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																				 
															_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
															self.dict[_fastaRecord.shortname] = _fastaRecord
			
															self.extracted.append(_fastaRecord)
															#print 'd' + previousgene.qualifiers['locus_tag'][0]
	
										else : #locus tag not present, we consider it here 
											_start = currentcds.location.start.position + self.start
											if _start < 0 : _start = 0
											_end = currentcds.location.start.position + self.end 
																	 
											_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
											self.dict[_fastaRecord.shortname] = _fastaRecord
	
											self.extracted.append(_fastaRecord)
											#print 'L' + currentcds.qualifiers['locus_tag'][0]
											


	
									
							else: #root is 1
								SeqRecordObj = ''
								prevSeqRecordObj = ''
								_good = 0
								p_good = 0	

								if self.mRNAannot == 1 :

									if 'locus_tag' in currentmRNA.qualifiers :
										SeqRecordObj = copy.deepcopy(currentmRNA)
										prevSeqRecordObj = copy.deepcopy(previousmRNA)
									else:
										SeqRecordObj = copy.deepcopy(currentcds)
										prevSeqRecordObj = copy.deepcopy(previouscds)

								else: #self.mRNAannot == 1
									SeqRecordObj = copy.deepcopy(currentgene)
									prevSeqRecordObj = copy.deepcopy(previousgene)


								if self.root == 1 :
									if currentgene.strand == -1 and (SeqRecordObj.location.end.position > currentcds.location.end.position) :
										_good = 1
									elif (SeqRecordObj.location.start.position < currentcds.location.start.position) :
										_good = 1
									else:
										_good = 0
								else:
									_good = 1 


								if self.root == 1 :
									if previousgene.strand == -1 and (prevSeqRecordObj.location.end.position > previouscds.location.end.position) :
										p_good = 1
									elif (prevSeqRecordObj.location.start.position < previouscds.location.start.position) :
										p_good = 1
									else:
										p_good = 0
								else:
									p_good = 1 


								_fastaRecord = fasta.FastaRecord()  #instance must be fresh, otherwise everything is point to the same thing
								_fastaRecord.shortname = SeqRecordObj.qualifiers['locus_tag'][0]
								try:
									_fastaRecord.long_name = _fastaRecord.shortname + " " + SeqRecordObj.qualifiers['db_xref'][0] + " on %i strand" % (SeqRecordObj.strand)
								except:
									_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (SeqRecordObj.strand)
								if currentcds.strand == -1 :
									
									if self.overlap == 1 and _good == 1 :
										
											_start = SeqRecordObj.location.end.position + (-1 * self.end)
											_end = SeqRecordObj.location.end.position + (-1 * self.start) 
											if _end > len(seq_record.seq) : _end = len(seq_record.seq)
											_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()
											self.dict[_fastaRecord.shortname] = _fastaRecord
											self.extracted.append(_fastaRecord)

									elif self.overlap == 0 and p_good == 1 :
										if previousgene.strand == -1 :
											if previousgene.qualifiers['locus_tag'][0] != currentgene.qualifiers['locus_tag'][0] :
												_fastaRecord = fasta.FastaRecord() 
												_fastaRecord.shortname = previouscds.qualifiers['locus_tag'][0]
												try:
													_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
												except:
													_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
												_start = prevSeqRecordObj.location.end.position + (-1 * self.end)
												_end = prevSeqRecordObj.location.end.position + (-1 * self.start) 
												if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																	 
												_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
												self.dict[_fastaRecord.shortname] = _fastaRecord
		
												self.extracted.append(_fastaRecord)	        
			
								else: #in sense orientation

									if self.overlap == 1 and _good == 1 :	 

										_start = SeqRecordObj.location.start.position + self.start
										if _start < 0 : _start = 0
										_end = SeqRecordObj.location.start.position  + self.end    
																																		  
										_fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	        						
										self.dict[_fastaRecord.shortname] = _fastaRecord

										self.extracted.append(_fastaRecord)

									elif self.overlap == 0 :

										if 'locus_tag' in previousgene.qualifiers :  #since the very first does not

											#current gene is sense, known since this block was called
											if _good == 1 :
												_start = SeqRecordObj.location.start.position + self.start
												if _start < previousgene.location.end.position : _start = previousgene.location.end.position
												_end = SeqRecordObj.location.start.position + self.end 
																 
												_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
												self.dict[_fastaRecord.shortname] = _fastaRecord
	
												self.extracted.append(_fastaRecord)

											#check for the previous gene, if antisense, process, is the same indent block as _good
											if previousgene.strand == -1 and p_good == 1 :
												if previousgene.qualifiers['locus_tag'][0] != currentgene.qualifiers['locus_tag'][0] :
														_fastaRecord = fasta.FastaRecord() 
														_fastaRecord.shortname = previousgene.qualifiers['locus_tag'][0]
														try:
															_fastaRecord.long_name = _fastaRecord.shortname + " " + previousgene.qualifiers['db_xref'][0] + " on %i strand" % (previouscds.strand)
														except:
															_fastaRecord.long_name = _fastaRecord.shortname + " on %i strand" % (previouscds.strand)
														_start = prevSeqRecordObj.location.end.position + (-1 * self.end)
														_end = prevSeqRecordObj.location.end.position + (-1 * self.start) 
														if _end > currentgene.location.start.position : _end = currentgene.location.start.position
																			 
														_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
														self.dict[_fastaRecord.shortname] = _fastaRecord		
														self.extracted.append(_fastaRecord)

										else : #locustag in previous
											_start = SeqRecordObj.location.start.position + self.start
											if _start < 0 : _start = 0
											_end = SeqRecordObj.location.start.position + self.end 
											_fastaRecord.sequence = _fastaRecord.sequence = seq_record.seq[_start:_end].tostring()	
											self.dict[_fastaRecord.shortname] = _fastaRecord
											self.extracted.append(_fastaRecord)
				
							

															
		

						#as block indent with _ok==1
						previousgene = copy.deepcopy(currentgene)
						previouscds = copy.deepcopy(currentcds) 
						previousmRNA = copy.deepcopy(currentmRNA)
						
			#to catch the last entry if it is antisense
			if self.overlap == 0 and currentgene.strand == -1 :  
										
				if self.root == 0 :                               
					_start = currentcds.location.end.position + (-1 * self.end)
					_end = currentcds.location.end.position + (-1 * self.start) 

				else:
					_start = SeqRecordObj.location.end.position + (-1 * self.end)
					_end = SeqRecordObj.location.end.position + (-1 * self.start) 
					
				if _end > len(seq_record.seq) : _end = len(seq_record.seq)                                                                                                             
				_fastaRecord.sequence = seq_record.seq[_start:_end].reverse_complement().tostring()	        						
				self.dict[_fastaRecord.shortname] = _fastaRecord
				self.extracted.append(_fastaRecord)

			print _genecount



	
	
	
