use of de.bioforscher.jstructure.mathematics.Pair in project jstructure by JonStargaryen.
the class A01_CreateContactMaps method handleFile.
private static void handleFile(String line) {
String[] split = line.split(";");
String stfId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPath(BASE_PATH.resolve("pdb").resolve(stfId + ".pdb")).parse();
Chain chain = structure.chainsWithAminoAcids().findFirst().get();
String sequence = chain.getAminoAcidSequence();
String secondaryStructureString = chain.aminoAcids().map(aminoAcid -> aminoAcid.getFeature(GenericSecondaryStructure.class)).map(GenericSecondaryStructure::getSecondaryStructure).map(SecondaryStructureType::getReducedRepresentation).collect(Collectors.joining()).toUpperCase();
Start2FoldConstants.write(BASE_PATH.resolve("reconstruction").resolve("fasta").resolve(stfId + ".fasta"), ">" + stfId + System.lineSeparator() + sequence);
Start2FoldConstants.write(BASE_PATH.resolve("reconstruction").resolve("sse").resolve(stfId + ".sse"), ">" + stfId + System.lineSeparator() + secondaryStructureString);
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(stfId + ".xml"), experimentIds);
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<Pair<AminoAcid, AminoAcid>> contacts = SetOperations.unorderedPairsOf(aminoAcids).filter(pair -> areNonCovalentGroups(pair.getLeft(), pair.getRight())).filter(pair -> ProteinGraphFactory.InteractionScheme.CALPHA8.areInContact(pair.getLeft(), pair.getRight())).collect(Collectors.toList());
List<Pair<AminoAcid, AminoAcid>> earlyFoldingContacts = contacts.stream().filter(pair -> earlyFoldingResidues.contains(pair.getLeft()) && earlyFoldingResidues.contains(pair.getRight())).collect(Collectors.toList());
String percentage = StandardFormat.formatToInteger(100 * earlyFoldingContacts.size() / (double) contacts.size());
System.out.println("fraction of EFR contacts is " + percentage + "%: " + earlyFoldingContacts.size() + " " + contacts.size());
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-sampled-100-1.rr"), composeRRString(contacts, sequence));
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-efr-" + percentage + "-1.rr"), composeRRString(earlyFoldingContacts, sequence));
for (int i = 5; i < 100; i = i + 5) {
int numberOfContactsToSelect = (int) (i / (double) 100 * contacts.size());
for (int j = 1; j < 6; j++) {
Collections.shuffle(contacts);
List<Pair<AminoAcid, AminoAcid>> selectedContacts = contacts.subList(0, numberOfContactsToSelect);
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-random-" + i + "-" + j + ".rr"), composeRRString(selectedContacts, sequence));
}
}
// create samplings of random residues
for (int i = 5; i < 100; i = i + 5) {
int numberOfResiduesToSelect = (int) (i / (double) 100 * aminoAcids.size());
for (int j = 1; j < 6; j++) {
Collections.shuffle(aminoAcids);
List<AminoAcid> selectedAminoAcids = aminoAcids.subList(0, numberOfResiduesToSelect);
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-residues-" + i + "-" + j + ".rr"), composeRRString(contacts.stream().filter(contact -> selectedAminoAcids.contains(contact.getLeft()) && selectedAminoAcids.contains(contact.getRight())).collect(Collectors.toList()), sequence));
}
}
// create samplings of comparable nature of EFR contacts
for (int j = 1; j < 6; j++) {
int numberOfResiduesToSelect = earlyFoldingResidues.size();
List<AminoAcid> interactingResidues = getInteractingResidues(aminoAcids, contacts, numberOfResiduesToSelect);
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-interacting-" + percentage + "-" + j + ".rr"), composeRRString(contacts.stream().filter(contact -> interactingResidues.contains(contact.getLeft()) && interactingResidues.contains(contact.getRight())).collect(Collectors.toList()), sequence));
}
// create bin samplings of comparable nature of EFR contacts
for (int i = 5; i < 100; i = i + 5) {
int numberOfResiduesToSelect = (int) (i / (double) 100 * aminoAcids.size());
for (int j = 1; j < 6; j++) {
List<AminoAcid> interactingResidues = getInteractingResidues(aminoAcids, contacts, numberOfResiduesToSelect);
Start2FoldConstants.write(MAP_PATH.resolve(stfId + "-interacting2-" + percentage + "-" + j + ".rr"), composeRRString(contacts.stream().filter(contact -> interactingResidues.contains(contact.getLeft()) && interactingResidues.contains(contact.getRight())).collect(Collectors.toList()), sequence));
}
}
}
Aggregations