use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.
the class UniProtMutationsForBindingSite method handleBindingSite.
private static void handleBindingSite(BindingSite bindingSite) {
String uniProtId = AARSConstants.lines(EFFECTS_TSV).map(line -> line.split(DEL)).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).findAny().get()[2];
// load original, full structure
Chain originalChain = ProteinParser.source(bindingSite.pdbId).parse().select().chainName(bindingSite.chainId).asChain();
String pdbSequence = originalChain.getAminoAcidSequence();
String uniProtSequence = loadUniProtSequence(uniProtId);
// align sequences
SequencePair<ProteinSequence, AminoAcidCompound> alignment = needle(uniProtSequence, pdbSequence);
System.out.println(bindingSite);
System.out.println(alignment);
// load renumbered, but not transformed chain
Chain renumberedChain = ProteinParser.source(Paths.get("/home/bittrich/git/aars_analysis/data/msa/" + classToProcess + "/renumbered_structures/" + bindingSite.pdbId + "_renum.pdb")).parse().select().chainName(bindingSite.chainId).asChain();
// key: renumbered, transformed binding site group - value: original group in PDB chain
List<Integer> residueIndices = bindingSite.residues.stream().map(Group::getResidueNumber).map(ResidueNumber::getResidueNumber).collect(Collectors.toList());
Map<Group, Group> groupMapping = renumberedChain.aminoAcids().filter(aminoAcid -> residueIndices.contains(aminoAcid.getResidueNumber())).collect(Collectors.toMap(Function.identity(), // map each group to the entity in the not renumbered structure
renumberedGroup -> originalChain.select().groupName(renumberedGroup.getThreeLetterCode()).asFilteredGroups().min(Comparator.comparingDouble(originalGroup -> originalGroup.calculate().centroid().distanceFast(renumberedGroup.calculate().centroid()))).get()));
// determine sequence position in sequence alignment - rarely these indices do not match
groupMapping.entrySet().forEach(entry -> {
int residueIndex = originalChain.getGroups().indexOf(entry.getValue()) + 1;
System.out.print("mapped: " + entry.getKey().getIdentifier() + " -> " + entry.getValue().getIdentifier() + " -> uniprot ");
try {
int indexInUniProt = alignment.getIndexInQueryForTargetAt(residueIndex);
System.out.println(alignment.getCompoundInQueryAt(indexInUniProt).getLongName().toUpperCase() + "-" + indexInUniProt);
String indexToFind = String.valueOf(indexInUniProt);
AARSConstants.lines(EFFECTS_TSV).map(line -> line.split("\t")).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).filter(split -> refersToPosition(split, indexToFind)).forEach(split -> {
String outputLine = bindingSite.pdbId + DEL + bindingSite.chainId + DEL + split[2] + DEL + bindingSite.clazz + DEL + bindingSite.aa + DEL + bindingSite.mode + DEL + entry.getKey().getResidueNumber() + DEL + entry.getValue().getResidueNumber() + DEL + split[3] + DEL + split[4] + DEL + split[5] + DEL + split[6] + DEL + split[7] + System.lineSeparator();
System.out.println(outputLine);
output.append(outputLine);
});
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("failed!");
warnings.append("#could not map ").append(entry.getValue().getIdentifier()).append(" in ").append(bindingSite.pdbId).append("_").append(bindingSite.chainId).append(" to UniProt sequence").append(System.lineSeparator());
}
});
}
use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.
the class Start2FoldXmlParser method assignValues.
private static void assignValues(Experiment experiment, Chain chain) {
String pdbSequence = chain.getAminoAcidSequence();
String experimentSequence = experiment.getSequence();
// align sequences to ensure correct mapping
SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
try {
pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
// logger.info("alignment:{}{}",
// System.lineSeparator(),
// pair.toString());
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (Experiment.Residue residue : experiment.getResidues()) {
int experimentIndex = residue.getIndex() - 1;
try {
int pdbIndex;
if (residue.getCode().equals("P") && residue.getIndex() == 1) {
// super-russian fix for STF0017 where the alignment should match theoretically
pdbIndex = 0;
} else {
pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
}
AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
// logger.debug("mapped experiment {}-{} onto PDB {}-{}",
// residue.getCode(),
// residue.getIndex(),
// aminoAcid.getOneLetterCode(),
// aminoAcid.getResidueIdentifier());
// if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
// logger.warn("alignment:{}{}",
// System.lineSeparator(),
// pair.toString());
// logger.warn("could not map data correctly for {}-{} and {}-{}",
// residue.getCode(),
// residue.getIndex(),
// aminoAcid.getOneLetterCode(),
// aminoAcid.getResidueIdentifier());
// }
// ignore: STF0034 (cannot align)
// assign experiment-specific protection level to residue
aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
} catch (Exception e) {
// residue not present in structure - e.g. for STF0031 and STF0032
logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
}
}
} catch (CompoundNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.
the class A01_SingleLinkageClusterer method computeNeedlemanWunschSimilarity.
/**
* Compute the Needleman-Wunsch alignment between 2 sequence and report the sequence identity.
*
* @param entry1 the reference sequence
* @param entry2 the query sequence
* @return the fraction of identically aligned positions
*/
private double computeNeedlemanWunschSimilarity(Chain entry1, Chain entry2) {
try {
ProteinSequence sequence1 = new ProteinSequence(entry1.getAminoAcidSequence());
ProteinSequence sequence2 = new ProteinSequence(entry2.getAminoAcidSequence());
SequencePair<ProteinSequence, AminoAcidCompound> pair = Alignments.getPairwiseAlignment(sequence1, sequence2, Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
System.out.println(pair.getPercentageOfIdentity());
return pair.getPercentageOfIdentity();
} catch (CompoundNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.
the class Start2FoldXmlParser method assignValues.
private static void assignValues(Experiment experiment, Chain chain) {
String pdbSequence = chain.getAminoAcidSequence();
String experimentSequence = experiment.getSequence();
// align sequences to ensure correct mapping
SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
try {
pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
logger.debug("alignment:{}{}", System.lineSeparator(), pair.toString());
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (Experiment.Residue residue : experiment.getResidues()) {
int experimentIndex = residue.getIndex() - 1;
try {
int pdbIndex;
if (residue.getCode().equals("P") && residue.getIndex() == 1) {
// super-russian fix for STF0017 where the alignment should match theoretically
pdbIndex = 0;
} else {
pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
}
AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
logger.debug("mapped experiment {}-{} onto PDB {}-{}", residue.getCode(), residue.getIndex(), aminoAcid.getOneLetterCode(), aminoAcid.getResidueIdentifier());
// if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
// logger.warn("alignment:{}{}",
// System.lineSeparator(),
// pair.toString());
// logger.warn("could not map data correctly for {}-{} and {}-{}",
// residue.getCode(),
// residue.getIndex(),
// aminoAcid.getOneLetterCode(),
// aminoAcid.getResidueIdentifier());
// }
// ignore: STF0034 (cannot align)
// assign experiment-specific protection level to residue
aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
} catch (Exception e) {
// residue not present in structure - e.g. for STF0031 and STF0032
logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
}
}
} catch (CompoundNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.
the class Start2FoldXmlParser method assignValuesForStrong.
private static void assignValuesForStrong(Experiment experiment, Chain chain) {
String pdbSequence = chain.getAminoAcidSequence();
String experimentSequence = experiment.getSequence();
// align sequences to ensure correct mapping
SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
try {
pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (Experiment.Residue residue : experiment.getResidues()) {
int experimentIndex = residue.getIndex() - 1;
try {
int pdbIndex;
if (residue.getCode().equals("P") && residue.getIndex() == 1) {
// super-russian fix for STF0017 where the alignment should match theoretically
pdbIndex = 0;
} else {
pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
}
AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
// assign experiment-specific protection level to residue
aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.STRONG);
} catch (Exception e) {
// residue not present in structure - e.g. for STF0031 and STF0032
logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
}
}
} catch (CompoundNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
Aggregations