use Math::Trig; #USE THIS MODULE # # Iterative method to align using RMSD by translating AND rotating to predict best structure of a modified small molecule from a different cartesian coordinate to a original ligand's cartesian co-ordinate system. # Code by - Shourjya Sanyal. Jun 2012. Dublin Ireland. print "***************************** \n"; print "Iterative RMSD Translate Partially Matched Ligands Ver 0.1 \n"; print "***************************** \n"; open (DATA,'input.txt') || die("Could not open file!"); # load the file into an array chomp(@dataData = ); close (DATA); # print "scalar @dataData"; for ($line = 1; $line < scalar @dataData; $line++) { print "$dataData[$line] \n"; ($loc[$line], $idloc[$line])=split(/\s/,$dataData[$line]); $loc[$line] = $idloc[$line]; } # TEST FOR input.txt #($line = 1; $line < scalar @dataData; $line++) # { # print "$idloc[$line] \n"; # } $inpnum = scalar @dataData; print "Line in Input --- $inpnum \n"; # 23***************************** # From - NEWPDB.pdb # To - crf24.pdb open (FROM,'crf46.pdb') || die("Could not open file!"); # load the file into an array chomp(@dataFrom = ); close(FROM); open (TO,'crf24.pdb') || die("Could not open file!"); # load the file into an array chomp(@dataTo = ); close(TO); $inputFile = "NEWPDB.pdb"; $inputFile2 = "crf24.pdb"; # ***************************** print "*****************************\n"; # ***************************** # FIND CENTER OF MASS OF FROM - PDB1 print "\n"; $addcmx=0; $addcmy=0; $addcmz=0; $totmass=0; $num = 0.0;$mass = 0; for ($line = 0; $line < scalar @dataFrom; $line++) {for ($lineloc = 1; $lineloc < ($inpnum/2); $lineloc++) {if ($line == ($idloc[$lineloc]-1)) {print "@dataFrom[$line]\n"; $mydata = @dataFrom[$line]; @aa= split('',$mydata); $e = @aa[77]; #print "$e"; if ($e eq "N") {$mass = 14.006;} elsif ($e == 'C') {$mass = 12.010;} elsif ($e == 'O') {$mass = 5.999;} elsif ($e == 'H') {$mass = 1.007;} $num = join('',@aa[32..37]); $tot = $num * $mass; $addcmx = $addcmx + $tot; $num = join('',@aa[40..46]); $tot = $num * $mass; $addcmy = $addcmy + $tot; $num = join('',@aa[48..54]); $tot = $num * $mass; $addcmz = $addcmz + $tot; $totmass = $totmass + $mass; } } } print "$totmass $addcmx $addcmy $addcmz\n"; $centerx1 = $addcmx / $totmass; $centery1 = $addcmy / $totmass; $centerz1 = $addcmz / $totmass; print" FIND CENTER OF MASS OF - FROM PDB1\n"; print "*****************************\n"; # ***************************** # FIND CENTER OF MASS OF TO - PDB2 print "\n"; $addcmx=0; $addcmy=0; $addcmz=0; $totmass=0; $num = 0.0;$mass = 0; for ($line = 0; $line < scalar @dataTo; $line++) {for ($lineloc = ($inpnum/2)+1; $lineloc < $inpnum; $lineloc++) {if ($line == ($idloc[$lineloc]-1)) {print "@dataTo[$line]"; $mydata = @dataTo[$line]; @aa= split('',$mydata); $e = @aa[77]; #print "$e"; if ($e eq "N") {$mass = 14.006;} elsif ($e == 'C') {$mass = 12.010;} elsif ($e == 'O') {$mass = 5.999;} elsif ($e == 'H') {$mass = 1.007;} $num = join('',@aa[32..37]); $tot = $num * $mass; $addcmx = $addcmx + $tot; $num = join('',@aa[40..46]); $tot = $num * $mass; $addcmy = $addcmy + $tot; $num = join('',@aa[48..54]); $tot = $num * $mass; $addcmz = $addcmz + $tot; $totmass = $totmass + $mass; #print "$tot" } } } print "$totmass $addcmx $addcmy $addcmz\n"; $centerx2 = $addcmx / $totmass; $centery2 = $addcmy / $totmass; $centerz2 = $addcmz / $totmass; print" FIND CENTER OF MASS OF - TO PDB2\n"; print "*****************************\n"; # ***************************** # TRANSLATE COM OF FROM - PDB1 TO ORIGIN $addcmx=0; $addcmy=0; $addcmz=0; $atomnm=0; $num = 0.0; for ($line = 0; $line < scalar @dataFrom; $line++) {if ($dataFrom[$line] =~ m/^ATOM/) { $mydata = @dataFrom[$line]; @aa= split('',$mydata); $num = join('',@aa[32..37]); $addcmx = $num - $centerx1; $addcmx = substr($addcmx, 0, 6); $num = join('',@aa[40..46]); $addcmy = $num - $centery1; $addcmy = substr($addcmy, 0, 6); $num = join('',@aa[48..54]); $addcmz = $num - $centerz1; $addcmz = substr($addcmz, 0, 6); $mydata = join('',@aa[0..31],$addcmx,' ',$addcmy,' ',$addcmz,,' ',@aa[55..77]); @dataFrom[$line] = $mydata; print "@dataFrom[$line]\n"; } } print" Move COM TO ORIGIN FROM - PDB1\n"; print "*****************************\n"; # ***************************** # TRANSLATE COM OF To - PDB 2 TO ORIGIN print"\n"; $addcmx=0; $addcmy=0; $addcmz=0; $atomnm=0; $num = 0.0; for ($line = 0; $line < scalar @dataTo; $line++) {if ($dataTo[$line] =~ m/^HETATM/) { $mydata = @dataTo[$line]; @aa= split('',$mydata); $num = join('',@aa[32..37]); $addcmx = $num - $centerx2; $addcmx = substr($addcmx, 0, 6); $num = join('',@aa[40..46]); $addcmy = $num - $centery2; $addcmy = substr($addcmy, 0, 6); $num = join('',@aa[48..54]); $addcmz = $num - $centerz2; $addcmz = substr($addcmz, 0, 6); $mydata = join('',@aa[0..31],$addcmx,' ',$addcmy,' ',$addcmz,,' ',@aa[55..77]); @dataTo[$line] = $mydata; print "@dataTo[$line]\n"; } } print" Move COM TO ORIGIN TO - PDB2\n"; print "*****************************\n"; #***************************** # DO MONTE CARLO TO FIND LOWEST RMSD # ***************************** # CALCULATE RMSD BETWEEN TWO STURUCTURES print "\n"; $rmsdmin = 1000; $anglex = 0; $angley = 0;$anglez = 0;$thetax = 0; $thetay = 0; $thetaz = 0; for($thetax = 0 ; $thetax < 180 ; $thetax = $thetax + 5) {for($thetay = 0 ; $thetay < 180 ; $thetay = $thetay + 5) {for($thetaz = 0 ; $thetaz < 180 ; $thetaz = $thetaz + 5) { for ($lineloc = ($inpnum/2)+1; $lineloc < $inpnum; $lineloc++) {#print "@dataFrom[($idloc[$lineloc-($inpnum/2)]-1)] \n"; #print "@dataTo[($idloc[$lineloc]-1)] \n"; $mydata1 = @dataFrom[($idloc[$lineloc-($inpnum/2)]-1)]; @aa= split('',$mydata1); $numx1 = join('',@aa[32..37]); $numy1 = join('',@aa[40..46]); $numz1 = join('',@aa[48..54]); #Rotate Matrix $anglex = deg2rad($thetax); $angley = deg2rad($thetay); $anglez = deg2rad($thetaz); $numx2 = ($numx1 *(cos($angley)*cos($anglez))) + ($numy1*(-(cos($anglex)*sin($anglez))+(sin($anglex)*sin($angley)*cos($anglez)))) + ($numz1*((sin($anglex)*sin($anglex))+(cos($anglex)*sin($angley)*cos($anglez)))); $numy2 = ($numx1 *(cos($angley)*sin($anglez))) + ($numy1*((cos($anglex)*cos($anglez))+(sin($anglex)*sin($angley)*sin($anglez)))) + ($numz1*(-(sin($anglex)*cos($anglez))+(cos($anglex)*sin($angley)*sin($anglez)))); $numz2 = ($numx1 *(-sin($angley))) + ($numy1*(sin($anglex)*cos($angley))) + ($numz1*(cos($anglex)*cos($angley))); $mydata2 = @dataTo[($idloc[$lineloc]-1)]; @aa= split('',$mydata2); $numx3 = join('',@aa[32..37]); $numy3 = join('',@aa[40..46]); $numz3 = join('',@aa[48..54]); $sumdis = $sumdis + (($numx3-$numx2)**2) + (($numy3-$numy2)**2) + (($numz3-$numz2)**2); } $rmsd = ($sumdis/($inpnum/2))**0.5; $sumdis = 0; #print "\n Sum Dist = $sumdis \n"; print " RMSD of $thetax,$thetay,$thetaz is $rmsd \n"; if ($rmsdmin > $rmsd) {$rmsdmin = $rmsd; $thetaxmin = $thetax; $thetaymin = $thetay; $thetazmin = $thetaz; } } } } print "*****************************\n"; print" CALCULATE RMSD \n"; print " RMSD Least $thetaxmin,$thetaymin,$thetazmin is $rmsdmin \n"; print "*****************************\n"; # ***************************** # Translate BOTH FROM PDB1 and TO PDB 2 To ORIGINAL POSITION OF PDB2 # Translate FROM PDB1 ON TO PDB2 ORIGINAL $addcmx=0; $addcmy=0; $addcmz=0; $atomnm=0; $num = 0.0; for ($line = 0; $line < scalar @dataFrom; $line++) {if ($dataFrom[$line] =~ m/^ATOM/) { $mydata = @dataFrom[$line]; @aa= split('',$mydata); $numx1 = join('',@aa[32..37]); $numy1 = join('',@aa[40..46]); $numz1 = join('',@aa[48..54]); $thetax = $thetaxmin; $thetay = $thetaymin; $thetaz = $thetazmin; $anglex = deg2rad($thetax); $angley = deg2rad($thetay); $anglez = deg2rad($thetaz); $numx2 = ($numx1 *(cos($angley)*cos($anglez))) + ($numy1*(-(cos($anglex)*sin($anglez))+(sin($anglex)*sin($angley)*cos($anglez)))) + ($numz1*((sin($anglex)*sin($anglex))+(cos($anglex)*sin($angley)*cos($anglez)))); $numy2 = ($numx1 *(cos($angley)*sin($anglez))) + ($numy1*((cos($anglex)*cos($anglez))+(sin($anglex)*sin($angley)*sin($anglez)))) + ($numz1*(-(sin($anglex)*cos($anglez))+(cos($anglex)*sin($angley)*sin($anglez)))); $numz2 = ($numx1 *(-sin($angley))) + ($numy1*(sin($anglex)*cos($angley))) + ($numz1*(cos($anglex)*cos($angley))); $addcmx = $numx2 + $centerx2; $addcmx = substr($addcmx, 0, 6); $addcmy = $numy2 + $centery2; $addcmy = substr($addcmy, 0, 6); $addcmz = $numz2 + $centerz2; $addcmz = substr($addcmz, 0, 6); $mydata = join('',@aa[0..31],$addcmx,' ',$addcmy,' ',$addcmz,,' ',@aa[55..78]); @dataFrom[$line] = $mydata; print "@dataFrom[$line]\n"; } } print" Move COM TO PDB2 COM FOR : FROM - PDB1\n\n"; # Translate TO PDB2 ON TO PDB2 ORIGINAL $addcmx=0; $addcmy=0; $addcmz=0; $atomnm=0; $num = 0.0; for ($line = 0; $line < scalar @dataTo; $line++) {if ($dataTo[$line] =~ m/^HETATM/) { $mydata = @dataTo[$line]; @aa= split('',$mydata); $num = join('',@aa[32..37]); $addcmx = $num + $centerx2; $addcmx = substr($addcmx, 0, 6); $num = join('',@aa[40..46]); $addcmy = $num + $centery2; $addcmy = substr($addcmy, 0, 6); $num = join('',@aa[48..54]); $addcmz = $num + $centerz2; $addcmz = substr($addcmz, 0, 6); $mydata = join('',@aa[0..31],$addcmx,' ',$addcmy,' ',$addcmz,,' ',@aa[55..78]); @dataTo[$line] = $mydata; print "@dataTo[$line]\n"; } } print" Move COM TO PDB2 COM FOR : FROM - PDB1\n"; print" $centerx2 $centery2 $centerz2\n"; # ***************************** # CREATE OUTPUT FILE For Final Structures $outputFile = "translate_".$inputFile; open (FH, ">$outputFile"); foreach ($line = 0; $line < scalar @dataFrom; $line++) { print FH "@dataFrom[$line]\n"; } close FH; $outputFile = "translate_".$inputFile2; open (FH2, ">$outputFile"); foreach ($line = 0; $line < scalar @dataTo; $line++) { print FH2 "@dataTo[$line]\n"; } close FH2; # *****************************