use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class StructuralInformationService method process.
public void process(Chain referenceChain, String jobName, Path confoldPath, Path tmalignPath, Path outputDirectory, int numberOfThreads, double baselineFrequency) {
try {
ExecutorService executorService = Executors.newFixedThreadPool(numberOfThreads);
List<AminoAcid> aminoAcids = referenceChain.aminoAcids().collect(Collectors.toList());
int numberOfAminoAcids = aminoAcids.size();
ReconstructionContactMap fullMap = ReconstructionContactMap.createReconstructionContactMap(referenceChain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
List<Pair<AminoAcid, AminoAcid>> contacts = fullMap.getLongRangeContacts();
int numberOfLongRangeContacts = contacts.size();
String sequence = fullMap.getSequence();
String secondaryStructure = fullMap.getSecondaryStructureElements();
// report general statistics
logger.info("evaluating structural information of chain with {} residues and {} long-range contacts", numberOfAminoAcids, numberOfLongRangeContacts);
logger.info("present long-range contacts:{}{}", System.lineSeparator(), composeConsoleOutput(fullMap.getLongRangeContacts()));
logger.info("coverage per position:{}{}", System.lineSeparator(), aminoAcids.stream().map(fullMap::getNonLocalContactsOf).mapToInt(List::size).mapToObj(size -> {
if (size < 10) {
return String.valueOf(size);
} else {
return "*";
}
}).collect(Collectors.joining()));
// write reference structure
Path referenceChainStructurePath = Files.createTempFile("confoldservice-ref", ".pdb");
Files.write(referenceChainStructurePath, referenceChain.getPdbRepresentation().getBytes());
// create outputDirectory
Path reconstructionDirectory = outputDirectory.resolve(jobName);
if (!Files.exists(reconstructionDirectory)) {
Files.createDirectory(reconstructionDirectory);
}
// create sampling containers
List<Future<BaselineReconstruction>> baselineFutures = new ArrayList<>();
for (int iteration = 0; iteration < COVERAGE; iteration++) {
baselineFutures.add(executorService.submit(new BaselineReconstruction(iteration, referenceChainStructurePath, referenceChain, fullMap, sequence, secondaryStructure, baselineFrequency, confoldPath.toFile().getAbsolutePath(), tmalignPath.toFile().getAbsolutePath(), outputDirectory, jobName)));
}
List<BaselineReconstruction> baselineReconstructions = baselineFutures.stream().map(this::getBaselineFuture).collect(Collectors.toList());
// create toggling reconstructions
int numberOfCombinations = baselineReconstructions.size() * contacts.size();
logger.info("reconstructing individual maps by contact toggling - evaluating {} combinations", numberOfCombinations);
int counter = 1;
List<Future<ContactTogglingReconstruction>> contactToggleFutures = new ArrayList<>();
for (Pair<AminoAcid, AminoAcid> contact : contacts) {
for (BaselineReconstruction baselineReconstruction : baselineReconstructions) {
contactToggleFutures.add(executorService.submit(baselineReconstruction.createContactTogglingReconstruction(contact, counter, numberOfCombinations)));
counter++;
}
}
Path outputPath = outputDirectory.resolve(jobName + ".out");
FileWriter outputWriter = new FileWriter(outputPath.toFile());
contactToggleFutures.stream().map(this::getContactToggleFuture).filter(Optional::isPresent).map(Optional::get).map(contactTogglingReconstruction -> contactTogglingReconstruction.getContactToToggle() + "\t" + contactTogglingReconstruction.isContactWasRemoved() + "\t" + StandardFormat.format(contactTogglingReconstruction.getBaselineReconstruction().getAverageRmsd()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getBaselineReconstruction().getAverageTmScore()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getBaselineReconstruction().getAverageQ()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getAverageRmsd()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getAverageTmScore()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getAverageQ()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getDecreaseRmsd()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getIncreaseTMScore()) + "\t" + StandardFormat.format(contactTogglingReconstruction.getIncreaseQ()) + System.lineSeparator()).forEach(line -> {
try {
outputWriter.write(line);
outputWriter.flush();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
// clean up
outputWriter.close();
Files.delete(referenceChainStructurePath);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class BaselineReconstruction method call.
@Override
public BaselineReconstruction call() throws Exception {
logger.info("submitting baseline reconstruction job with id {}", iteration);
// create sampling of full map
List<Pair<AminoAcid, AminoAcid>> fullContacts = fullMap.getLongRangeContacts();
Collections.shuffle(fullContacts);
int numberOfContactsToSelect = (int) (fullContacts.size() * baselineFrequency);
List<Pair<AminoAcid, AminoAcid>> sampledContacts = fullContacts.subList(0, numberOfContactsToSelect);
this.sampledMap = new ReconstructionContactMap(referenceChain.aminoAcids().collect(Collectors.toList()), sampledContacts, fullMap.getContactDefinition());
// reconstruct sampled baseline map
sampledReconstructions = new ConfoldServiceWorker(confoldPath, sequence, secondaryStructure, sampledMap.getCaspRRRepresentation(), fullMap.getConfoldRRType()).call().getChains();
// write reconstruction files
Path reconstructionDirectory = outputPath.resolve(jobname);
for (int i = 1; i <= sampledReconstructions.size(); i++) {
Files.write(reconstructionDirectory.resolve("baseline-reconstruction-" + iteration + "-" + i + ".pdb"), sampledReconstructions.get(i - 1).getPdbRepresentation().getBytes());
}
// score baseline models
computeBaselinePerformance(sampledReconstructions);
return this;
}
use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class A03_ReconstructByVariousStrategy method handleChain.
private static void handleChain(ExplorerChain explorerChain) {
logger.info("[{}] starting job", explorerChain.getStfId());
try {
Chain nativeChain = explorerChain.getChain();
Path nativeChainPath = Files.createTempFile("nativechain-", ".pdb");
Files.write(nativeChainPath, nativeChain.getPdbRepresentation().getBytes());
List<ContactStructuralInformation> contactStructuralInformation = explorerChain.getContacts();
// annotate with PLIP data
PLIPInteractionContainer plipInteractionContainer = nativeChain.getFeature(PLIPInteractionContainer.class);
for (ContactStructuralInformation csi : contactStructuralInformation) {
AminoAcid aminoAcid1 = nativeChain.select().residueNumber(csi.getResidueIdentifier1()).asAminoAcid();
AminoAcid aminoAcid2 = nativeChain.select().residueNumber(csi.getResidueIdentifier2()).asAminoAcid();
if (plipInteractionContainer.getHydrogenBonds().stream().anyMatch(hydrogenBond -> isContact(hydrogenBond, aminoAcid1, aminoAcid2))) {
csi.markAsHydrogenBond();
}
if (plipInteractionContainer.getHydrophobicInteractions().stream().anyMatch(hydrophobicInteraction -> isContact(hydrophobicInteraction, aminoAcid1, aminoAcid2))) {
csi.markAsHydrophobicInteraction();
}
}
int numberOfNativeContacts = contactStructuralInformation.size();
int numberOfContactsToSelect = (int) (numberOfNativeContacts * DEFAULT_COVERAGE);
List<ReconstructionContactMap> contactMaps = Stream.of(ReconstructionStrategyDefinition.values()).map(ReconstructionStrategyDefinition::getReconstructionStrategy).flatMap(reconstructionStrategy -> IntStream.range(0, REDUNDANCY).boxed().flatMap(i -> {
if (!reconstructionStrategy.isNegatable()) {
ReconstructionContactMap contactMap = reconstructionStrategy.composeReconstructionContactMap(nativeChain, contactStructuralInformation, numberOfContactsToSelect);
contactMap.setName(reconstructionStrategy.getName() + "-" + (i + 1));
return Stream.of(contactMap);
} else {
// short, long, hydrogen, and hydrophobic bins have to be negated explicitly to get comparable results
Pair<ReconstructionContactMap, ReconstructionContactMap> contactMapPair = reconstructionStrategy.composeReconstructionAndNegatedReconstructionContactMap(nativeChain, contactStructuralInformation, numberOfContactsToSelect);
contactMapPair.getLeft().setName(reconstructionStrategy.getName() + "-" + (i + 1));
contactMapPair.getRight().setName(reconstructionStrategy.getNegatedName() + "-" + (i + 1));
return Stream.of(contactMapPair.getLeft(), contactMapPair.getRight());
}
})).filter(reconstructionContactMap -> reconstructionContactMap.getNumberOfContacts() > 0).collect(Collectors.toList());
Map<String, List<Future<ReconstructionResult>>> reconstructionFutures = new HashMap<>();
for (ReconstructionContactMap contactMap : contactMaps) {
String name = contactMap.getName().split("-")[0];
logger.info("[{}] handling contact map definition {}", explorerChain.getStfId(), name);
if (!reconstructionFutures.containsKey(name)) {
reconstructionFutures.put(name, new ArrayList<>());
}
List<Future<ReconstructionResult>> bin = reconstructionFutures.get(name);
bin.add(executorService.submit(new ConfoldServiceWorker("/home/sb/programs/confold_v1.0/confold.pl", contactMap.getSequence(), contactMap.getSecondaryStructureElements(), contactMap.getCaspRRRepresentation(), contactMap.getConfoldRRType())));
}
for (Map.Entry<String, List<Future<ReconstructionResult>>> reconstructionFuture : reconstructionFutures.entrySet()) {
try {
String name = reconstructionFuture.getKey();
List<Chain> reconstructions = reconstructionFuture.getValue().stream().map(future -> {
try {
return future.get();
} catch (Exception e) {
throw new ComputationException(e);
}
}).map(ReconstructionResult::getChains).flatMap(Collection::stream).collect(Collectors.toList());
logger.info("[{}][{}] {} reconstructs in bin", explorerChain.getStfId(), name, reconstructions.size());
List<TMAlignAlignmentResult> alignmentResults = new ArrayList<>();
List<Path> tmpFiles = new ArrayList<>();
if (reconstructions.isEmpty()) {
throw new ComputationException("reconstruction did not yield any reconstructs");
}
for (Chain reconstructedChain : reconstructions) {
Path reconstructPath = Files.createTempFile("confoldservice-recon", ".pdb");
tmpFiles.add(reconstructPath);
Files.write(reconstructPath, reconstructedChain.getPdbRepresentation().getBytes());
alignmentResults.add(TM_ALIGN_SERVICE.process(new String[] { "/home/sb/programs/tmalign", nativeChainPath.toFile().getAbsolutePath(), reconstructPath.toFile().getAbsolutePath() }));
}
logger.info("[{}][{}] {} alignments in bin", explorerChain.getStfId(), name, alignmentResults.size());
if (alignmentResults.isEmpty()) {
throw new ComputationException("tmalign did not yield any alignments");
}
for (TMAlignAlignmentResult alignmentResult : alignmentResults) {
double rmsd = alignmentResult.getRootMeanSquareDeviation().getScore();
String line = explorerChain.getStfId() + "," + name + "," + rmsd;
logger.info("[{}][{}] {}", explorerChain.getStfId(), name, line);
fileWriter.write(line + System.lineSeparator());
fileWriter.flush();
}
// cleanup
for (Path tmpFile : tmpFiles) {
Files.delete(tmpFile);
}
} catch (IOException e) {
throw new ComputationException(e);
}
}
} catch (IOException | AlignmentException e) {
throw new ComputationException(e);
}
}
use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class A04_CreateRmsdVsCoveragePlot method handleChain.
private static void handleChain(ExplorerChain explorerChain) {
logger.info("handling chain {}", explorerChain.getStfId());
try {
Chain nativeChain = explorerChain.getChain();
Path nativeChainPath = Files.createTempFile("nativechain-", ".pdb");
Files.write(nativeChainPath, nativeChain.getPdbRepresentation().getBytes());
ReconstructionContactMap nativeContactMap = ReconstructionContactMap.createReconstructionContactMap(nativeChain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
List<AminoAcid> aminoAcids = nativeChain.getAminoAcids();
List<Pair<AminoAcid, AminoAcid>> contacts = nativeContactMap.getLongRangeContacts();
int numberNativeLongRangeContacts = contacts.size();
List<ReconstructionContactMap> reconstructionContactMaps = new ArrayList<>();
for (int coverage = 5; coverage <= 100; coverage = coverage + 5) {
int numberOfContactsToSelect = (int) Math.round(0.01 * coverage * numberNativeLongRangeContacts);
for (int run = 0; run < REDUNDANCY; run++) {
Collections.shuffle(contacts);
List<Pair<AminoAcid, AminoAcid>> selectedContacts = contacts.subList(0, numberOfContactsToSelect);
ReconstructionContactMap contactMap = new ReconstructionContactMap(aminoAcids, selectedContacts, nativeContactMap.getContactDefinition());
contactMap.setName("p" + coverage + "-" + (run + 1));
reconstructionContactMaps.add(contactMap);
}
}
Map<String, List<Future<ReconstructionResult>>> reconstructionFutures = new HashMap<>();
for (ReconstructionContactMap contactMap : reconstructionContactMaps) {
String name = contactMap.getName().split("-")[0];
logger.info("handling contact map with coverage {}", name);
if (!reconstructionFutures.containsKey(name)) {
reconstructionFutures.put(name, new ArrayList<>());
}
List<Future<ReconstructionResult>> bin = reconstructionFutures.get(name);
bin.add(executorService.submit(new ConfoldServiceWorker("/home/sb/programs/confold_v1.0/confold.pl", contactMap.getSequence(), contactMap.getSecondaryStructureElements(), contactMap.getCaspRRRepresentation(), nativeContactMap.getConfoldRRType())));
}
for (Map.Entry<String, List<Future<ReconstructionResult>>> reconstructionFuture : reconstructionFutures.entrySet()) {
try {
String name = reconstructionFuture.getKey();
List<Chain> reconstructions = reconstructionFuture.getValue().stream().map(future -> {
try {
return future.get();
} catch (Exception e) {
throw new ComputationException(e);
}
}).map(ReconstructionResult::getChains).flatMap(Collection::stream).collect(Collectors.toList());
List<TMAlignAlignmentResult> alignmentResults = new ArrayList<>();
List<Path> tmpFiles = new ArrayList<>();
for (Chain reconstructedChain : reconstructions) {
Path reconstructPath = Files.createTempFile("confoldservice-recon", ".pdb");
tmpFiles.add(reconstructPath);
Files.write(reconstructPath, reconstructedChain.getPdbRepresentation().getBytes());
alignmentResults.add(TM_ALIGN_SERVICE.process(new String[] { "/home/sb/programs/tmalign", nativeChainPath.toFile().getAbsolutePath(), reconstructPath.toFile().getAbsolutePath() }));
}
if (alignmentResults.isEmpty()) {
throw new ComputationException("tmalign did not yield any alignments");
}
for (TMAlignAlignmentResult alignmentResult : alignmentResults) {
double rmsd = alignmentResult.getRootMeanSquareDeviation().getScore();
String line = explorerChain.getStfId() + "," + name.replace("p", "") + "," + rmsd;
logger.info(line);
fileWriter.write(line + System.lineSeparator());
fileWriter.flush();
}
// cleanup
for (Path tmpFile : tmpFiles) {
Files.delete(tmpFile);
}
} catch (IOException e) {
throw new ComputationException(e);
}
}
} catch (IOException | AlignmentException e) {
throw new ComputationException(e);
}
}
use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class A04A_CreatePyMolRendering method handleChain.
private static void handleChain(ExplorerChain explorerChain) {
logger.info("handling chain {}", explorerChain.getStfId());
try {
Chain nativeChain = explorerChain.getChain();
Path nativeChainPath = Files.createTempFile("nativechain-", ".pdb");
Files.write(nativeChainPath, nativeChain.getPdbRepresentation().getBytes());
ReconstructionContactMap nativeContactMap = ReconstructionContactMap.createReconstructionContactMap(nativeChain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
List<AminoAcid> aminoAcids = nativeChain.getAminoAcids();
List<Pair<AminoAcid, AminoAcid>> contacts = nativeContactMap.getLongRangeContacts();
int numberNativeLongRangeContacts = contacts.size();
List<ReconstructionContactMap> reconstructionContactMaps = new ArrayList<>();
IntStream.of(5, 30, 100).forEach(coverage -> {
int numberOfContactsToSelect = (int) Math.round(0.01 * coverage * numberNativeLongRangeContacts);
for (int run = 0; run < REDUNDANCY; run++) {
Collections.shuffle(contacts);
List<Pair<AminoAcid, AminoAcid>> selectedContacts = contacts.subList(0, numberOfContactsToSelect);
ReconstructionContactMap contactMap = new ReconstructionContactMap(aminoAcids, selectedContacts, nativeContactMap.getContactDefinition());
contactMap.setName("p" + coverage + "-" + (run + 1));
reconstructionContactMaps.add(contactMap);
}
});
Map<String, List<Future<ReconstructionResult>>> reconstructionFutures = new HashMap<>();
for (ReconstructionContactMap contactMap : reconstructionContactMaps) {
String name = contactMap.getName().split("-")[0];
logger.info("handling contact map with coverage {}", name);
if (!reconstructionFutures.containsKey(name)) {
reconstructionFutures.put(name, new ArrayList<>());
}
List<Future<ReconstructionResult>> bin = reconstructionFutures.get(name);
bin.add(executorService.submit(new ConfoldServiceWorker("/home/sb/programs/confold_v1.0/confold.pl", contactMap.getSequence(), contactMap.getSecondaryStructureElements(), contactMap.getCaspRRRepresentation(), nativeContactMap.getConfoldRRType())));
}
for (Map.Entry<String, List<Future<ReconstructionResult>>> reconstructionFuture : reconstructionFutures.entrySet()) {
try {
String name = reconstructionFuture.getKey();
List<Chain> reconstructions = reconstructionFuture.getValue().stream().map(future -> {
try {
return future.get();
} catch (Exception e) {
throw new ComputationException(e);
}
}).map(ReconstructionResult::getChains).flatMap(Collection::stream).collect(Collectors.toList());
for (Chain reconstructedChain : reconstructions) {
Files.write(OUTPUT_PATH.resolve(name + ".pdb"), reconstructedChain.getPdbRepresentation().getBytes());
}
} catch (IOException e) {
throw new ComputationException(e);
}
}
} catch (IOException e) {
throw new ComputationException(e);
}
}
Aggregations