Vous êtes sur la page 1sur 8

import shutil

import os
import time
import PoseScorer
import Watgen5
import Babel
import PoseScorer
import PDBTools
import KDTree
import FGD

class Atom:
def __init__(P,line):
P.X = float(line[31:38])
P.Y = float(line[39:46])
P.Z = float(line[47:54])
P.Line = line
P.Chain = line[21:22]
P.ResidueName = line[17:20]

class Drug:
def __init__(Drug,Name,Brand,Bank,LigandID):
Drug.Name = Name
Drug.Brand = Brand
Drug.Bank = [Bank]
Drug.Ligand = [LigandID]

class Records:
def __init__(Record,Filename,Outcome,Rearrangement):
Record.File = Filename
Record.Outcome = Outcome
Record.Rear = Rearrangement

#----------------------------------------------------------------------------------
---------------------------------

def LoadCodons():
global CodonCheckList
CodonCheckList = []
File = open("codonslast.txt","r")
for line in File:
ThreeLetterCode = line[24:27]
if ThreeLetterCode != " ":
CodonCheckList.append(ThreeLetterCode)

def LoadDrugs():
DrugCheckList = []
File = open("druglast.txt","r")
for line in File:
ary = line.replace("\n","").split("\t")
NewDrug = Drug(ary[0],ary[1],ary[2],ary[3])
DrugCheckList.append(NewDrug)
DrugDictionary(DrugCheckList)

def DrugDictionary(listofdrugs):
global DrugCodes
DrugCodes = {}
for drug in listofdrugs:
for lig in drug.Ligand:
if lig not in DrugCodes:
DrugCodes[lig] = drug.Name

#----------------------------------------------------------------------------------
---------------------------------

def PerformSortBatch(folderPath):
Number = 1
global Folder
Folder = []
global number
number = 0
EverythingInFolder = os.listdir(folderPath)
for file in EverythingInFolder:
if (file[-4:] == ".pdb"):
Folder.append(folderPath+"/"+file)
for i in range(len(Folder)):
number+=1
filename = Folder[i]
SplitPDB(filename)

def SplitPDB(filename):
CondencedInfo = []
Residues = []
NotepadInfo = open(filename,"r")
for line in NotepadInfo:
if (line[0:4] == "ATOM") or (line[0:6] == "HETATM"):
Found = False
line = line.replace("\n","")
newAtom = Atom(line)
CondencedInfo.append(newAtom)
if (newAtom.ResidueName not in Residues):
Residues.append(newAtom.ResidueName)
print("Sorting File",number,"of",len(Folder),"\tFileName:",filename[-8:-
4]+".pdb")
Found = DrugSearch(Residues)
if Found == True:
KnownDrugFileMover(filename)
if Found == False:
UnknownDrugFileMover(filename)

def DrugSearch(residues):
for r in residues:
if r in DrugCodes:
return True
return False

def KnownDrugFileMover(filename):
shutil.copy2(filename, ContainsDrugs)

def UnknownDrugFileMover(filename):
shutil.copy2(filename, DoesntContainDrugs)

#----------------------------------------------------------------------------------
---------------------------------
def PerformSplitBatch(folderPath):
Number = 1
Folder = getPDBinFolder(folderPath)
for number in range(len(Folder)):
filename = Folder[number]
print("Splitting File",number+1,"of",len(Folder),"\tFileName:",filename)
Drug, Target, Waters = splitPDB(folderPath+"/"+filename)
newDirectoryName = "FolderOfAutomation/"+filename.replace(".pdb","")
if not os.path.exists(newDirectoryName):
os.mkdir(newDirectoryName)
writeSplitFiles(Drug, Target, Waters, newDirectoryName)

def getPDBinFolder(folderPath):
Folder = []
rootFolder = os.getcwd()
EverythingInFolder = os.listdir(folderPath)
for file in EverythingInFolder:
if (file[-4:] == ".pdb"):
Folder.append(file)
return Folder

def splitPDB(OpenThisFile):
ListOfChains = []
AllWaters = []
AllDrug = []
CondensedInfo = []
NotepadInfo = open(OpenThisFile,"r")
for line in NotepadInfo:
if (line[0:4] == "ATOM") or (line[0:6] == "HETATM"):
line = line.replace("\n","")
newAtom = Atom(line)
if newAtom.ResidueName in DrugCodes:
AllDrug.append(newAtom)
elif newAtom.ResidueName == "HOH":
AllWaters.append(newAtom)
elif newAtom.ResidueName in CodonCheckList:
CondensedInfo.append(newAtom)

DrugChain = AllDrug[0].Chain

Drug = []
OneDrug = None
for i in AllDrug:
if i.Line[16:17] == " " or i.Line[16:17] == "A":
if i.Chain == DrugChain and OneDrug == None:
Drug.append(i)
OneDrug = i.Line[22:26]
elif i.Chain == DrugChain and i.Line[22:26] == OneDrug:
Drug.append(i)
ListOfChains = determineChains(Drug,CondensedInfo,10)
RetainedTarget = []
for a in CondensedInfo:
if (a.Chain in ListOfChains):
RetainedTarget.append(a)
return Drug,RetainedTarget, AllWaters

def determineChains(Drug,Target,radius):
chains = []
for a in Target:
if a.Chain not in chains:
for d in Drug:
if (Distance(d,a) <= radius):
chains.append(a.Chain)
return chains

def Distance(P1,P2):
XDistance = P1.X - P2.X
YDistance = P1.Y - P2.Y
ZDistance = P1.Z - P2.Z
Distance = (XDistance**2+YDistance**2+ZDistance**2)**0.5
return float(Distance)

def writeSplitFiles(Drug,Target,AllWaters,folder = ""):


if (folder != "") and (folder[-1] != "/"):
folder = folder+"/"
CCount = 0
NonC = []
DrugStrand = open(folder+"DrugStrand.pdb","w")
for atom in Drug:
if CCount <= 2:
if atom.Line[77:78] == "C":
DrugStrand.write(atom.Line+"\n")
CCount+=1
else:
if folder[-5:-1] not in UsedNonC:
UsedNonC.append(folder[-5:-1])
NonC.append(atom)
elif CCount == 3:
for line in NonC:
DrugStrand.write(line.Line+"\n")
DrugStrand.write(atom.Line+"\n")
CCount+=1
elif CCount > 3:
DrugStrand.write(atom.Line+"\n")
DrugStrand.close()
ProteinStrand = open(folder+"ProteinStrand.pdb","w")
for atom in Target:
ProteinStrand.write(atom.Line+"\n")
ProteinStrand.close()
WaterStrand = open(folder+"AllWaters.pdb","w")
for atom in AllWaters:
WaterStrand.write(atom.Line+"\n")
WaterStrand.close()

#----------------------------------------------------------------------------------
---------------------------------

def PerformWatgenBatch(folderPath):
Number = 1
rootFolder = os.getcwd()
FolderList = os.listdir(folderPath)
Report = open("WatGen5RR.tsv","w")
for number in range(len(FolderList)):
filename = FolderList[number]
print("Watgen-ing
File",number+1,"of",len(FolderList),"\tFileName:",filename)
stem = "%s/%s/"%(folderPath,filename)
os.chdir(stem)
print(os.getcwd())
Watgen5.runWatgen5("BabelOut.pdb","ProteinStrand.pdb","WatgenWaters.pdb")
os.chdir(rootFolder)
WriteReport(filename, Report)
Report.close()

def WriteReport(filename, Report):


Data = CheckFile(filename)
if filename in UsedNonC:
if Data == True:
Report.write(filename+"\t"+"Success"+"\t"+"RearrangedAtoms"+"\n")
if Data == False:
Report.write(filename+"\t"+"Failure"+"\t"+"RearrangedAtoms"+"\n")
if filename not in UsedNonC:
if Data == True:
Report.write(filename+"\t"+"Success"+"\t"+"---"+"\n")
if Data == False:
Report.write(filename+"\t"+"Failure"+"\t"+"---"+"\n")

def CheckFile(filename):
Check = open("FolderOfAutomation/"+filename+"/WatgenWaters.pdb","r")
tsil = []
for PL in Check:
tsil.append(PL)
if len(tsil) > 0:
return True
else:
return False

#----------------------------------------------------------------------------------
---------------------------------

def PerformBabelBatch(folderPath):
cwd = os.getcwd()
rootFolder = cwd+"/"+folderPath
AllFolders = os.listdir(rootFolder)
FolderList = []
for FolderName in AllFolders:
FolderList.append(FolderName)
for number in range(len(FolderList)):
filename = FolderList[number]
print("Adding protons to
file",number+1,"of",len(FolderList),"\tFileName:",filename)
os.chdir(rootFolder+"/"+filename)
Babel.runBabel("DrugStrand.pdb","BabelOut.pdb",7.4)
os.chdir(rootFolder)
os.chdir(cwd)

#----------------------------------------------------------------------------------
---------------------------------

def PerformAlignBatch(folderPath):
Pass = SuccessfulDrugs()
Number = 1
rootFolder = os.getcwd()
AllFolders = os.listdir(folderPath)
FolderList = []
for folder in AllFolders:
if folder in Pass:
FolderList.append(folder)
for number in range(len(FolderList)):
filename = FolderList[number]
print("Aligning File",number+1,"of",len(FolderList),"\tFileName:",filename)
stem = "%s/%s/"%(folderPath,filename)
alignWaters(stem)
os.chdir(rootFolder)

def SuccessfulDrugs():
Pass = []
WatGen5RR = open("WatGen5RR.tsv","r")
for file in WatGen5RR:
ary = file.replace("\n","").split("\t")
Record = Records(ary[0],ary[1],ary[2])
if Record.Outcome == "Success":
Pass.append(Record.File)
return Pass

def alignWaters(folderPath):
XrayWaters = []
for line in open(folderPath+"AllWaters.pdb",'r'):
newAtom = PDBTools.Atom(line)
if newAtom.valid and (newAtom.residueType == "HOH") and (newAtom.atomType
== "O"):
newAtom.oldResidueNumber = newAtom.residueNumber
newAtom.residueNumber = len(XrayWaters) + 1
XrayWaters.append(newAtom)
WatgenWaters = []
for line in open(folderPath+"WatgenWaters.pdb",'r'):
newAtom = PDBTools.Atom(line)
if newAtom.valid and (newAtom.residueType == "WAT") and (newAtom.atomType
== "O"):
WatgenWaters.append(newAtom)
LigandAtoms = []
for line in open(folderPath+"BabelOut.pdb",'r'):
newAtom = PDBTools.Atom(line)
if newAtom.valid:
LigandAtoms.append(newAtom)
Ligand = PDBTools.Peptide(LigandAtoms,1,[])
#print("There are %i Xray waters and %i Watgen Waters"%
(len(XrayWaters),len(WatgenWaters)))
alignment = PoseScorer.alignWater(XrayWaters,Ligand,WatgenWaters)
alignStr = makeAlignmentString(XrayWaters,WatgenWaters,Ligand,alignment)
outf = open(folderPath+"Alignment.txt",'w')
outf.write(alignStr)
outf.close()

def makeAlignmentString(baseWaters,poseWaters,pose,align):
poseTree = KDTree.KDTree.loadAtomArray(pose.atoms)
alignStr="Present in ligand structure:\n"
alignDistTotal = 0
alignCount = 0.0
for j in range(0,len(align)):
if (align[j] > 0):
left = poseWaters[j]
right = baseWaters[align[j]-1]
alignStr+=str(j+1)+"\t"+str(right.oldResidueNumber)
dist = left.distance(right)
alignDistTotal+=dist
alignCount+=1.0
alignStr+="\t"+str(round(dist,2))
else:
alignStr+=str(j+1)+"\t0"
alignStr+="\n"
if (alignCount > 0):
alignAverage = alignDistTotal / alignCount
else:
alignAverage = 0.0
alignStr+="Number of alignments: "+str(int(alignCount))+"\n"
alignStr+="Sum of alignment distances: "+str(alignDistTotal)+"\n"
alignStr+="Average alignment distance: "+str(alignAverage)+"\n"
alignStr+="Only Present in base:\n"
expected = PoseScorer.expectedWaters(baseWaters,poseTree)
#print("Len expected ",len(expected)," ",len(baseWaters))
missing = PoseScorer.missingWaters(expected,align)
for m in missing:
alignStr+="\t"+str(m)+"\n"
return alignStr
#----------------------------------------------------------------------------------
---------------------------------

def PerformFGDBatch(folderPath):
cwd = os.getcwd()
rootFolder = cwd+"/"+folderPath
AllFolders = os.listdir(rootFolder)
FolderList = []
for FolderName in AllFolders:
FolderList.append(FolderName)
for number in range(len(FolderList)):
filename = FolderList[number]
print("Finding Functional Groups of
file",number+1,"of",len(FolderList),"\tFileName:",filename)
os.chdir(rootFolder+"/"+filename)
FGD.Start("BabelOut.pdb")
os.chdir(rootFolder)
os.chdir(cwd)

#----------------------------------------------------------------------------------
---------------------------------

def IndiWat(filename):
os.chdir("FolderOfAutomation/"+filename)
Watgen5.runWatgen5("BabelOut.pdb","ProteinStrand.pdb","WatgenWaters.pdb")

#----------------------------------------------------------------------------------
---------------------------------

start = time.time()
ContainsDrugs = "PDB With Known Drug"
DoesntContainDrugs = "PDB With Unknown or No Drug"
PDBFiles = "../PDB information"
UsedNonC = []
LoadDrugs()
LoadCodons()

PerformSortBatch("PDBInfo8")
print("Sort duration ",(time.time() - start))
splitStart = time.time()
print ("\n")
PerformSplitBatch("PDB With Known Drug")
print("Split duration ",(time.time() - splitStart))
print ("\n")

BabelStart = time.time()
print ("\n")
PerformBabelBatch("FolderOfAutomation")
print("Babel duration ",(time.time() - BabelStart))
print ("\n")

watgenStart = time.time()
print ("\n")
PerformWatgenBatch("FolderOfAutomation")
print("Watgen duration ",(time.time() - watgenStart))
print ("\n")

AlignStart = time.time()
print ("\n")
PerformAlignBatch("FolderOfAutomation")
print("Total Align duration ",(time.time() - AlignStart))
print ("\n")

FGDStart = time.time()
print ("\n")
PerformFGDBatch("FolderOfAutomation")
print("Total FGD duration ",(time.time() - FGDStart))
print ("\n")

print ("Total Routine duration ",(time.time() - start))


print ("Done")

#IndiWat("1CEB")
while True:
pass