Attribute VB_Name = "list_distance_matrix"
'List Distance gives the absolute difference divided by the total sum of
'members between two lists. Output is a Phylographer Binary score pair, or
'a top-right interaction triangle (for drawing evolutionary trees).
'    Script:
'    List Distance Matrix, Copyright (C) 2007 Berendzen, Uni-Tbingen
'    see: http://www.zmbp.uni-tuebingen.de/PlantPhysiology/ResearchGroups/harter/berendzen/index.html	
'    List Distance Matrix comes with ABSOLUTELY NO WARRANTY;
'    This is free software, and you are welcome to redistribute it
'    under certain conditions.
'
'    This program is free software; you can redistribute it and/or modify
'    it under the terms of the GNU General Public License as published by
'    the Free Software Foundation; either version 2 of the License, or
'    (at your option) any later version.
'
'    This program is distributed in the hope that it will be useful,
'    but WITHOUT ANY WARRANTY; without even the implied warranty of
'    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
'    GNU General Public License for more details.
'
'    You should have received a copy of the GNU General Public License
'    along with this program; if not, write to the Free Software
'    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.


Option Explicit
Dim MM As New MotifMapperBasicClass
Dim ExperimentalFileAGI_listPaths() As String
Dim ExperimentalFileAGI_delimit_lists() As String
Dim outFileMainTextFile
Dim outFilePhyloparserIdentityList
Dim ListDistance As Double
Dim RootFolderName As String
Dim RootFolderForExpFiles As String
Dim FolderRootStem As String
Dim PhylographerOUTPUT As Integer
Dim fsO                     '-for File System Object - initilized in main


Function FolderPathStem(ByVal FullPath As String) As String
 '* this returns the path part before the last folder
 '- this REQUIRES that at least one folder is given...no drive possible

If InStr(Len(FullPath) - 1, FullPath, "\") Then FullPath = Mid(FullPath, 1, Len(FullPath) - 1)
FolderPathStem = Mid(FullPath, 1, InStrRev(FullPath, "\", , vbBinaryCompare) - 1)
FolderPathStem = FolderPathStem + "\"

End Function


Sub Main()
   '*-LOCAL VARIABLES
   Dim i As Integer, ii As Integer
   Dim datei
   Dim DummyVariable
    ReDim ExperimentalFileAGI_listPaths(0)
    ReDim ExperimentalFileAGI_delimit_lists(0)

   '*-GLOBAL VARIABLES
   Set fsO = CreateObject("Scripting.FileSystemObject")
   
   RootFolderName = MM.setROOT("\ListDistance\")
   'prepare teh folder
   If Not (fsO.folderExists(RootFolderName)) Then
    'create a new folder for convience
    Set datei = fsO.Createfolder(RootFolderName)
   End If
    
   '* input the RootFolder with experimental promoter sets
   RootFolderForExpFiles = InputBox("Enter the Root Folder Path with all " & vbCr & "Gene Lists.", "Root Folder for Experimental Data", "")
   If RootFolderForExpFiles <> "" Then DummyVariable = RetrieveAllTextFilePaths(RootFolderForExpFiles, RootFolderName, 0, ExperimentalFileAGI_listPaths)
    '*
    ReDim ExperimentalFileAGI_delimit_lists(UBound(ExperimentalFileAGI_listPaths))
    FolderRootStem = FolderPathStem(RootFolderForExpFiles)

    Set datei = fsO.GetFolder(RootFolderForExpFiles)
    RootFolderName = RootFolderName + "\" + datei.Name
    If Not (fsO.folderExists(RootFolderName)) Then
     Set datei = fsO.Createfolder(RootFolderName)
    End If
    'datei.Close
    
  '*
  PhylographerOUTPUT = InputBox("[0] = Matrix Output" & vbCr & "[1] = Phylographer Output", "T or P", 0)
  
    
  '*
  If PhylographerOUTPUT = 1 Then
   Set outFileMainTextFile = fsO.CreateTextFile(RootFolderName + "\" + "Phylo" + DateTime.Date$ + ".txt", True)
   Set outFilePhyloparserIdentityList = fsO.CreateTextFile(RootFolderName + "\" + "MyList" + DateTime.Date$ + ".txt", True)
  Else
   Set outFileMainTextFile = fsO.CreateTextFile(RootFolderName + "\" + "LTM" + DateTime.Date$ + ".txt", True)
  End If
  

  '* get lists
  If RootFolderForExpFiles <> "" Then   'Something to Compare?
   For i = 1 To UBound(ExperimentalFileAGI_listPaths) '1 index
          
    ExperimentalFileAGI_delimit_lists(i) = getAGIlist(ExperimentalFileAGI_listPaths(i), 1, "Common")

   Next i
  End If
  '* files are there!!!!!!!


  '* process
  If PhylographerOUTPUT = 1 Then
   outputPHLYOgrapher
  ElseIf PhylographerOUTPUT = 0 Then
   outputRightTopTriangle
  End If


outFileMainTextFile.Close
End Sub

Sub outputPHLYOgrapher()
Dim outerLoopName As String
Dim innerLoopName As String
Dim i, ii


  For i = 1 To UBound(ExperimentalFileAGI_listPaths)
  '* PRINT OUT THE TOP NAMES
  outerLoopName = ""
  outerLoopName = Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "")
  outerLoopName = Replace(outerLoopName, "\", "-", , , vbBinaryCompare)
   outFilePhyloparserIdentityList.write outerLoopName & vbNewLine
  Next i
  outFilePhyloparserIdentityList.Close
  

  For i = 1 To (UBound(ExperimentalFileAGI_delimit_lists) - 1)
   '* PRINT OUT THE CURRENT LIST NAME
   outerLoopName = ""
   outerLoopName = Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "")
   outerLoopName = Replace(outerLoopName, "\", "-", , , vbBinaryCompare)
   
   
    For ii = i + 1 To UBound(ExperimentalFileAGI_delimit_lists)
    ListDistance = 0

   
   '* GET THE LIST DISTANCE
    ListDistance = getLISTdistance(ExperimentalFileAGI_delimit_lists(i), ExperimentalFileAGI_delimit_lists(ii))
    
    '* PRINT OUT THE CURRENT LIST NAME
    innerLoopName = ""
    innerLoopName = Replace(ExperimentalFileAGI_listPaths(ii), FolderRootStem, "")
    innerLoopName = Replace(innerLoopName, "\", "-", , , vbBinaryCompare)
     outFileMainTextFile.write outerLoopName & vbTab & innerLoopName & vbTab

    '* PRINT OUT THE LIST DIFFERENCE
    outFileMainTextFile.write ListDistance '& vbTab
    outFileMainTextFile.write vbNewLine
   Next ii
   
   
  Next i
  


End Sub

Sub outputRightTopTriangle()
Dim i, ii
Dim tempSTRING As String

   outFileMainTextFile.write vbTab
  For i = 1 To UBound(ExperimentalFileAGI_listPaths)
  '* PRINT OUT THE TOP NAMES
  tempSTRING = ""
  tempSTRING = Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "")
  tempSTRING = Replace(tempSTRING, "\", "-", , , vbBinaryCompare)
   outFileMainTextFile.write tempSTRING & vbTab
  Next i
  outFileMainTextFile.write vbNewLine
  

  For i = 1 To UBound(ExperimentalFileAGI_delimit_lists)
  '* PRINT OUT THE CURRENT LIST NAME
  tempSTRING = ""
  tempSTRING = Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "")
  tempSTRING = Replace(tempSTRING, "\", "-", , , vbBinaryCompare)
   outFileMainTextFile.write tempSTRING & vbTab
   
    For ii = 1 To UBound(ExperimentalFileAGI_delimit_lists)
    ListDistance = 0


   '* GET THE LIST DISTANCE
    ListDistance = getLISTdistance(ExperimentalFileAGI_delimit_lists(i), ExperimentalFileAGI_delimit_lists(ii))
    

    '* PRINT OUT THE LIST DIFFERENCE
    outFileMainTextFile.write ListDistance & vbTab
   Next ii
   
   outFileMainTextFile.write vbNewLine
  Next i
  
End Sub

Function getLISTdistance(ByVal FIRST_list As String, ByVal SECOND_list As String) As Double
Dim tempFIRSTarray
Dim FirstListLen As Integer
Dim SecondListLen As Integer
Dim SharedBetween As Integer
    SharedBetween = 0
Dim reGGExp
    Set reGGExp = New RegExp
Dim r1Matches   ', r1Match
Dim i As Integer
     
 reGGExp.Pattern = "%"
 reGGExp.Global = True
 reGGExp.IgnoreCase = True
 Set r1Matches = reGGExp.Execute(FIRST_list)
 FirstListLen = r1Matches.Count - 1
 
 reGGExp.Pattern = "%"
 reGGExp.Global = True
 reGGExp.IgnoreCase = True
 Set r1Matches = reGGExp.Execute(SECOND_list)
 SecondListLen = r1Matches.Count - 1
 
FIRST_list = UCase(FIRST_list)
SECOND_list = UCase(SECOND_list)

tempFIRSTarray = Split(FIRST_list, "%", , vbBinaryCompare)

'MsgBox tempFIRSTarray(0) & " " & tempFIRSTarray(UBound(tempFIRSTarray))

For i = 1 To (UBound(tempFIRSTarray) - 1)  '*first and last are empty

 If InStr(1, SECOND_list, "%" + tempFIRSTarray(i) + "%") Then SharedBetween = SharedBetween + (1 * 2)

Next i

If PhylographerOUTPUT = 1 Then
 getLISTdistance = SharedBetween / (FirstListLen + SecondListLen)
Else
 getLISTdistance = 1 - (SharedBetween / (FirstListLen + SecondListLen))
End If

'MsgBox "halt"
End Function

Function RetrieveAllTextFilePaths(LocalRootFolder, ByVal NewPath As String, FoundFiles As Integer, PathsArray)
 '* will return all text files found under a given folder
 '* including subfolders, all folders with -.txt files will be
 '* created under the root folder
Dim QueriedObj, oTextFiles, obj
Dim TempArr() As String
Dim datei
Dim dummy

If FoundFiles = 0 Then
    ReDim PathsArray(0)
End If

 Set QueriedObj = fsO.GetFolder(LocalRootFolder)  '* the current folder/subfolder
 
 'Set oTextFiles = QueriedObj.Files   '* get only the .txt files
 
 'For Each obj In QueriedObj.Files
 ' If InList(fsO3.GetExtensionName(obj), fileFORMAT) Then
 
 '- if we find that there are text files there we set the destination folders

    'prepare another folder
   ' NewPath = NewPath + "\" + QueriedObj.Name
   ' If Not (fsO.Folderexists(NewPath)) Then
   '  Set datei = fsO.Createfolder(NewPath)
   ' End If

 
 For Each obj In QueriedObj.Files
  If UCase(fsO.GetExtensionName(obj)) = "TXT" Then
   ReDim Preserve PathsArray(UBound(PathsArray) + 1)
   PathsArray(UBound(PathsArray)) = fsO.GetAbsolutePathName(obj)
  End If
 Next obj
  
 For Each obj In QueriedObj.subFolders
 'MsgBox obj
  dummy = RetrieveAllTextFilePaths(obj, NewPath, UBound(PathsArray), PathsArray)
 Next

End Function

Function getAGIlist(ByVal FullPath As String, AGIlistColumn As Integer, IdentifierString As String)
Dim fsOpen
Dim Flagg As Integer
    Flagg = 0
Dim lineSPLITED, atLINE As String

    getAGIlist = "%"
    
    '*open the file and get the AGI
    Set fsOpen = fsO.OpenTextFile(FullPath)
    
    '- save as a % delimited string
   Do Until fsOpen.atEndofStream
    
    atLINE = fsOpen.readLine
    lineSPLITED = Split(atLINE, vbTab, , vbBinaryCompare)
   
    If Flagg = 1 And atLINE <> "" And (UBound(lineSPLITED) >= AGIlistColumn) Then
     getAGIlist = getAGIlist + lineSPLITED(AGIlistColumn) + "%"
    End If
    
    If UCase(lineSPLITED(AGIlistColumn)) = UCase(IdentifierString) Then Flagg = 1
   Loop


End Function
