use of com.compomics.util.experiment.biology.modifications.ModificationProvider in project peptide-shaker by compomics.
the class ModificationLocalizationScorer method scorePTMs.
/**
* Scores PTMs in a protein match.
*
* @param identification The identification object containing the matches.
* @param proteinMatch The protein match.
* @param identificationParameters The identification parameters.
* @param scorePeptides If true, peptides will be scored as well.
* @param modificationProvider The modification provider to use.
* @param waitingHandler The waiting handler to sue, ignored if null.
*/
public void scorePTMs(Identification identification, ProteinMatch proteinMatch, IdentificationParameters identificationParameters, boolean scorePeptides, ModificationProvider modificationProvider, WaitingHandler waitingHandler) {
HashMap<Integer, ArrayList<String>> confidentSites = new HashMap<>();
HashMap<Integer, HashMap<Integer, HashSet<String>>> ambiguousSites = new HashMap<>();
for (long peptideKey : proteinMatch.getPeptideMatchesKeys()) {
PeptideMatch peptideMatch = identification.getPeptideMatch(peptideKey);
Peptide peptide = peptideMatch.getPeptide();
PSParameter psParameter = (PSParameter) peptideMatch.getUrParam(PSParameter.dummy);
if (psParameter.getMatchValidationLevel().isValidated() && peptide.getNVariableModifications() > 0) {
PSModificationScores peptideScores = (PSModificationScores) peptideMatch.getUrParam(PSModificationScores.dummy);
if (peptideScores == null || scorePeptides) {
scorePTMs(identification, peptideMatch, identificationParameters, modificationProvider, waitingHandler);
peptideScores = (PSModificationScores) peptideMatch.getUrParam(PSModificationScores.dummy);
}
if (peptideScores != null) {
int[] peptideStart = peptide.getProteinMapping().get(proteinMatch.getLeadingAccession());
for (int confidentSite : peptideScores.getConfidentSites()) {
for (int peptideTempStart : peptideStart) {
int siteOnProtein = peptideTempStart + confidentSite - 1;
ArrayList<String> modificationsAtSite = confidentSites.get(siteOnProtein);
if (modificationsAtSite == null) {
modificationsAtSite = new ArrayList<>();
confidentSites.put(siteOnProtein, modificationsAtSite);
}
for (String modName : peptideScores.getConfidentModificationsAt(confidentSite)) {
if (!modificationsAtSite.contains(modName)) {
modificationsAtSite.add(modName);
}
}
}
}
for (int representativeSite : peptideScores.getRepresentativeSites()) {
HashMap<Integer, HashSet<String>> peptideAmbiguousSites = peptideScores.getAmbiguousModificationsAtRepresentativeSite(representativeSite);
for (int peptideTempStart : peptideStart) {
int proteinRepresentativeSite = peptideTempStart + representativeSite - 1;
HashMap<Integer, HashSet<String>> proteinAmbiguousSites = ambiguousSites.get(proteinRepresentativeSite);
if (proteinAmbiguousSites == null) {
proteinAmbiguousSites = new HashMap<>(peptideAmbiguousSites.size());
ambiguousSites.put(proteinRepresentativeSite, proteinAmbiguousSites);
}
for (int peptideSite : peptideAmbiguousSites.keySet()) {
int siteOnProtein = peptideTempStart + peptideSite - 1;
proteinAmbiguousSites.put(siteOnProtein, peptideAmbiguousSites.get(peptideSite));
}
}
}
}
}
}
// remove ambiguous sites where a confident was found and merge overlapping groups
PSModificationScores proteinScores = new PSModificationScores();
ArrayList<Integer> representativeSites = new ArrayList<>(ambiguousSites.keySet());
Collections.sort(representativeSites);
for (Integer representativeSite : representativeSites) {
HashMap<Integer, HashSet<String>> secondarySitesMap = ambiguousSites.get(representativeSite);
ArrayList<Integer> secondarySites = new ArrayList<>(secondarySitesMap.keySet());
for (int secondarySite : secondarySites) {
ArrayList<String> confidentModifications = confidentSites.get(secondarySite);
if (confidentModifications != null) {
boolean sameModification = confidentModifications.stream().map(modName -> modificationProvider.getModification(modName)).anyMatch(confidentModification -> secondarySitesMap.get(secondarySite).stream().map(modName -> modificationProvider.getModification(modName)).anyMatch(secondaryModification -> secondaryModification.getMass() == confidentModification.getMass()));
if (sameModification) {
ambiguousSites.remove(representativeSite);
break;
}
}
if (secondarySite != representativeSite) {
ArrayList<Integer> tempRepresentativeSites = new ArrayList<>(ambiguousSites.keySet());
Collections.sort(tempRepresentativeSites);
for (Integer previousSite : tempRepresentativeSites) {
if (previousSite >= representativeSite) {
break;
}
if (previousSite == secondarySite) {
HashMap<Integer, HashSet<String>> previousSites = ambiguousSites.get(previousSite);
HashSet<String> previousModifications = previousSites.get(previousSite);
boolean sameModification = previousModifications.stream().map(modName -> modificationProvider.getModification(modName)).anyMatch(previousModification -> secondarySitesMap.get(secondarySite).stream().map(modName -> modificationProvider.getModification(modName)).anyMatch(secondaryModification -> secondaryModification.getMass() == previousModification.getMass()));
if (sameModification) {
for (int tempSecondarySite : secondarySitesMap.keySet()) {
if (!previousSites.containsKey(secondarySite)) {
previousSites.put(tempSecondarySite, secondarySitesMap.get(tempSecondarySite));
}
}
ambiguousSites.remove(representativeSite);
}
}
}
}
}
}
for (int confidentSite : confidentSites.keySet()) {
for (String modName : confidentSites.get(confidentSite)) {
proteinScores.addConfidentModificationSite(modName, confidentSite);
}
}
for (int representativeSite : ambiguousSites.keySet()) {
proteinScores.addAmbiguousModificationSites(representativeSite, ambiguousSites.get(representativeSite));
}
proteinMatch.addUrParam(proteinScores);
}
use of com.compomics.util.experiment.biology.modifications.ModificationProvider in project peptide-shaker by compomics.
the class ModificationLocalizationScorer method attachProbabilisticScore.
/**
* Attaches the selected probabilistic modification score.
*
* @param spectrumMatch The spectrum match studied, the score will be
* calculated for the best assumption only.
* @param sequenceProvider The protein sequence provider to use.
* @param spectrumProvider The spectrum provider to use.
* @param modificationProvider The modification provider to use.
* @param identificationParameters The identification parameters.
* @param peptideSpectrumAnnotator The peptide spectrum annotator to use.
* @param identification The identification object containing the matches.
*/
private void attachProbabilisticScore(SpectrumMatch spectrumMatch, SequenceProvider sequenceProvider, SpectrumProvider spectrumProvider, ModificationProvider modificationProvider, IdentificationParameters identificationParameters, PeptideSpectrumAnnotator peptideSpectrumAnnotator, Identification identification) {
SearchParameters searchParameters = identificationParameters.getSearchParameters();
AnnotationParameters annotationParameters = identificationParameters.getAnnotationParameters();
ModificationLocalizationParameters scoringParameters = identificationParameters.getModificationLocalizationParameters();
SequenceMatchingParameters sequenceMatchingParameters = identificationParameters.getSequenceMatchingParameters();
SequenceMatchingParameters modificationSequenceMatchingParameters = scoringParameters.getSequenceMatchingParameters();
ModificationParameters modificationParameters = searchParameters.getModificationParameters();
PSModificationScores modificationScores = (PSModificationScores) spectrumMatch.getUrParam(PSModificationScores.dummy);
if (modificationScores != null) {
modificationScores = new PSModificationScores();
spectrumMatch.addUrParam(modificationScores);
}
HashMap<Double, ArrayList<Modification>> modificationsMap = new HashMap<>(1);
HashMap<Double, Integer> nMod = new HashMap<>(1);
PeptideAssumption bestPeptideAssumption = spectrumMatch.getBestPeptideAssumption();
Peptide peptide = bestPeptideAssumption.getPeptide();
for (ModificationMatch modificationMatch : peptide.getVariableModifications()) {
Modification refMod = modificationProvider.getModification(modificationMatch.getModification());
double modMass = refMod.getMass();
if (!modificationsMap.containsKey(modMass)) {
ArrayList<Modification> modifications = modificationFactory.getSameMassNotFixedModifications(modMass, searchParameters).stream().map(modification -> modificationProvider.getModification(modification)).collect(Collectors.toCollection(ArrayList::new));
modificationsMap.put(modMass, modifications);
nMod.put(modMass, 1);
} else {
nMod.put(modMass, nMod.get(modMass) + 1);
}
}
if (!modificationsMap.isEmpty()) {
String spectrumFile = spectrumMatch.getSpectrumFile();
String spectrumTitle = spectrumMatch.getSpectrumTitle();
Spectrum spectrum = spectrumProvider.getSpectrum(spectrumFile, spectrumTitle);
SpecificAnnotationParameters specificAnnotationParameters = annotationParameters.getSpecificAnnotationParameters(spectrumFile, spectrumTitle, bestPeptideAssumption, modificationParameters, sequenceProvider, modificationSequenceMatchingParameters, peptideSpectrumAnnotator);
for (double modMass : modificationsMap.keySet()) {
HashMap<Integer, Double> scores = null;
if (scoringParameters.getSelectedProbabilisticScore() == ModificationLocalizationScore.PhosphoRS) {
scores = PhosphoRS.getSequenceProbabilities(peptide, modificationsMap.get(modMass), modificationParameters, spectrum, sequenceProvider, annotationParameters, specificAnnotationParameters, scoringParameters.isProbabilisticScoreNeutralLosses(), sequenceMatchingParameters, modificationSequenceMatchingParameters, peptideSpectrumAnnotator);
if (scores == null) {
throw new IllegalArgumentException("An error occurred while scoring spectrum " + spectrumTitle + " of file " + spectrumFile + " with PhosphoRS.");
// Most likely a compatibility issue with utilities
}
}
if (scores != null) {
// remap to searched modifications
Modification mappedModification = null;
String peptideSequence = peptide.getSequence();
for (int site : scores.keySet()) {
if (site == 0) {
// N-term mod
for (Modification modification : modificationsMap.get(modMass)) {
if (modification.getModificationType().isNTerm()) {
mappedModification = modification;
break;
}
}
if (mappedModification == null) {
throw new IllegalArgumentException("Could not map the PTM of mass " + modMass + " on the N-terminus of the peptide " + peptideSequence + ".");
}
} else if (site == peptideSequence.length() + 1) {
// C-term mod
for (Modification modification : modificationsMap.get(modMass)) {
if (modification.getModificationType().isCTerm()) {
mappedModification = modification;
break;
}
}
if (mappedModification == null) {
throw new IllegalArgumentException("Could not map the PTM of mass " + modMass + " on the C-terminus of the peptide " + peptideSequence + ".");
}
} else {
for (Modification modification : modificationsMap.get(modMass)) {
mappedModification = modification;
break;
}
if (mappedModification == null) {
throw new IllegalArgumentException("Could not map the PTM of mass " + modMass + " at site " + site + " in peptide " + peptide.getSequence() + ".");
}
}
String modName = mappedModification.getName();
ModificationScoring modificationScoring = modificationScores.getModificationScoring(modName);
if (modificationScoring == null) {
modificationScoring = new ModificationScoring(modName);
modificationScores.addModificationScoring(modName, modificationScoring);
}
modificationScoring.setProbabilisticScore(site, scores.get(site));
}
}
}
identification.updateObject(spectrumMatch.getKey(), spectrumMatch);
}
}
use of com.compomics.util.experiment.biology.modifications.ModificationProvider in project peptide-shaker by compomics.
the class ProteinProcessor method processProteins.
/**
* Scores the PTMs of all protein matches contained in an identification
* object, estimates spectrum counting and summary statistics.
*
* @param modificationLocalizationScorer The modification localization
* scorer to use.
* @param metrics If provided, metrics on proteins will be saved while
* iterating the matches.
* @param modificationProvider The modification provider to use.
* @param waitingHandler The handler displaying feedback to the user.
* @param exceptionHandler The exception handler to use.
* @param processingParameters The processing parameters.
*
* @throws java.lang.InterruptedException exception thrown if a thread gets
* interrupted
* @throws java.util.concurrent.TimeoutException exception thrown if the
* operation times out
*/
public void processProteins(ModificationLocalizationScorer modificationLocalizationScorer, Metrics metrics, ModificationProvider modificationProvider, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, ProcessingParameters processingParameters) throws InterruptedException, TimeoutException {
waitingHandler.setWaitingText("Scoring Protein Modification Localization. Please Wait...");
int max = identification.getProteinIdentification().size();
waitingHandler.setSecondaryProgressCounterIndeterminate(false);
waitingHandler.setMaxSecondaryProgressCounter(max);
// validate the proteins
ExecutorService pool = Executors.newFixedThreadPool(processingParameters.getnThreads());
ProteinMatchesIterator proteinMatchesIterator = identification.getProteinMatchesIterator(waitingHandler);
ArrayList<ProteinRunnable> runnables = new ArrayList<>(processingParameters.getnThreads());
for (int i = 1; i <= processingParameters.getnThreads(); i++) {
ProteinRunnable runnable = new ProteinRunnable(proteinMatchesIterator, modificationLocalizationScorer, modificationProvider, waitingHandler, exceptionHandler);
pool.submit(runnable);
runnables.add(runnable);
}
if (waitingHandler.isRunCanceled()) {
pool.shutdownNow();
}
pool.shutdown();
if (!pool.awaitTermination(identification.getProteinIdentification().size(), TimeUnit.MINUTES)) {
throw new InterruptedException("Protein matches validation timed out. Please contact the developers.");
}
waitingHandler.setSecondaryProgressCounterIndeterminate(true);
if (metrics != null) {
metrics.setMaxSpectrumCounting(runnables.stream().mapToDouble(ProteinRunnable::getMaxSpectrumCounting).sum());
metrics.setnValidatedProteins(runnables.stream().mapToInt(ProteinRunnable::getnValidatedProteins).sum());
metrics.setnConfidentProteins(runnables.stream().mapToInt(ProteinRunnable::getnConfidentProteins).sum());
metrics.setMaxNPeptides(runnables.stream().mapToInt(ProteinRunnable::getMaxPeptides).max().orElse(0));
metrics.setMaxNPsms(runnables.stream().mapToInt(ProteinRunnable::getMaxPsms).max().orElse(0));
metrics.setMaxMW(runnables.stream().mapToDouble(ProteinRunnable::getMaxMW).max().orElse(0.0));
metrics.setMaxProteinAccessionLength(runnables.stream().mapToInt(ProteinRunnable::getMaxProteinAccessionLength).max().orElse(0));
TreeMap<Double, TreeMap<Integer, TreeMap<Integer, TreeSet<Long>>>> orderMap1 = new TreeMap<>();
for (int i = 0; i < runnables.size(); i++) {
HashMap<Double, HashMap<Integer, HashMap<Integer, HashSet<Long>>>> threadMap1 = runnables.get(i).getOrderMap();
for (Entry<Double, HashMap<Integer, HashMap<Integer, HashSet<Long>>>> entry1 : threadMap1.entrySet()) {
double key1 = entry1.getKey();
HashMap<Integer, HashMap<Integer, HashSet<Long>>> threadMap2 = entry1.getValue();
TreeMap<Integer, TreeMap<Integer, TreeSet<Long>>> orderMap2 = orderMap1.get(key1);
if (orderMap2 == null) {
orderMap2 = new TreeMap<>();
orderMap1.put(key1, orderMap2);
}
for (Entry<Integer, HashMap<Integer, HashSet<Long>>> entry2 : threadMap2.entrySet()) {
int key2 = entry2.getKey();
HashMap<Integer, HashSet<Long>> threadMap3 = entry2.getValue();
TreeMap<Integer, TreeSet<Long>> orderMap3 = orderMap2.get(key2);
if (orderMap3 == null) {
orderMap3 = new TreeMap<>();
orderMap2.put(key2, orderMap3);
}
for (Entry<Integer, HashSet<Long>> entry3 : threadMap3.entrySet()) {
int key3 = entry3.getKey();
HashSet<Long> threadSet = entry3.getValue();
TreeSet<Long> orderedSet = orderMap3.get(key3);
if (orderedSet == null) {
orderedSet = new TreeSet<>();
orderMap3.put(key3, orderedSet);
}
orderedSet.addAll(threadSet);
}
}
}
}
long[] proteinKeys = orderMap1.values().stream().flatMap(map -> map.values().stream()).flatMap(map -> map.values().stream()).flatMap(set -> set.stream()).mapToLong(a -> a).toArray();
metrics.setProteinKeys(proteinKeys);
}
}
Aggregations