use of org.biojava.nbio.core.sequence.ProteinSequence in project jstructure by JonStargaryen.
the class UniProtMutationsForBindingSite method handleBindingSite.
private static void handleBindingSite(BindingSite bindingSite) {
String uniProtId = AARSConstants.lines(EFFECTS_TSV).map(line -> line.split(DEL)).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).findAny().get()[2];
// load original, full structure
Chain originalChain = ProteinParser.source(bindingSite.pdbId).parse().select().chainName(bindingSite.chainId).asChain();
String pdbSequence = originalChain.getAminoAcidSequence();
String uniProtSequence = loadUniProtSequence(uniProtId);
// align sequences
SequencePair<ProteinSequence, AminoAcidCompound> alignment = needle(uniProtSequence, pdbSequence);
System.out.println(bindingSite);
System.out.println(alignment);
// load renumbered, but not transformed chain
Chain renumberedChain = ProteinParser.source(Paths.get("/home/bittrich/git/aars_analysis/data/msa/" + classToProcess + "/renumbered_structures/" + bindingSite.pdbId + "_renum.pdb")).parse().select().chainName(bindingSite.chainId).asChain();
// key: renumbered, transformed binding site group - value: original group in PDB chain
List<Integer> residueIndices = bindingSite.residues.stream().map(Group::getResidueNumber).map(ResidueNumber::getResidueNumber).collect(Collectors.toList());
Map<Group, Group> groupMapping = renumberedChain.aminoAcids().filter(aminoAcid -> residueIndices.contains(aminoAcid.getResidueNumber())).collect(Collectors.toMap(Function.identity(), // map each group to the entity in the not renumbered structure
renumberedGroup -> originalChain.select().groupName(renumberedGroup.getThreeLetterCode()).asFilteredGroups().min(Comparator.comparingDouble(originalGroup -> originalGroup.calculate().centroid().distanceFast(renumberedGroup.calculate().centroid()))).get()));
// determine sequence position in sequence alignment - rarely these indices do not match
groupMapping.entrySet().forEach(entry -> {
int residueIndex = originalChain.getGroups().indexOf(entry.getValue()) + 1;
System.out.print("mapped: " + entry.getKey().getIdentifier() + " -> " + entry.getValue().getIdentifier() + " -> uniprot ");
try {
int indexInUniProt = alignment.getIndexInQueryForTargetAt(residueIndex);
System.out.println(alignment.getCompoundInQueryAt(indexInUniProt).getLongName().toUpperCase() + "-" + indexInUniProt);
String indexToFind = String.valueOf(indexInUniProt);
AARSConstants.lines(EFFECTS_TSV).map(line -> line.split("\t")).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).filter(split -> refersToPosition(split, indexToFind)).forEach(split -> {
String outputLine = bindingSite.pdbId + DEL + bindingSite.chainId + DEL + split[2] + DEL + bindingSite.clazz + DEL + bindingSite.aa + DEL + bindingSite.mode + DEL + entry.getKey().getResidueNumber() + DEL + entry.getValue().getResidueNumber() + DEL + split[3] + DEL + split[4] + DEL + split[5] + DEL + split[6] + DEL + split[7] + System.lineSeparator();
System.out.println(outputLine);
output.append(outputLine);
});
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("failed!");
warnings.append("#could not map ").append(entry.getValue().getIdentifier()).append(" in ").append(bindingSite.pdbId).append("_").append(bindingSite.chainId).append(" to UniProt sequence").append(System.lineSeparator());
}
});
}
Aggregations