Search in sources :

Example 1 with GroupSimplification

use of eu.isas.peptideshaker.protein_inference.GroupSimplification in project peptide-shaker by compomics.

the class PeptideShaker method createProject.

/**
 * Creates a PeptideShaker project.
 *
 * @param waitingHandler the handler displaying feedback to the user
 * @param exceptionHandler handler for exceptions
 * @param identificationParameters the identification parameters
 * @param processingParameters the processing parameters
 * @param projectType the project type
 * @param spectrumCountingParameters the spectrum counting parameters
 * @param spectrumProvider the spectrum provider
 * @param setWaitingHandlerFinshedWhenDone if true, the waiting handler is
 * set to finished when the project is created
 * @param projectDetails the project details
 *
 * @throws java.lang.InterruptedException exception thrown if a thread gets
 * interrupted
 * @throws java.util.concurrent.TimeoutException exception thrown if a
 * process times out
 * @throws java.io.IOException if an exception occurs when parsing files
 */
public void createProject(IdentificationParameters identificationParameters, ProcessingParameters processingParameters, SpectrumCountingParameters spectrumCountingParameters, SpectrumProvider spectrumProvider, ProjectDetails projectDetails, ProjectType projectType, WaitingHandler waitingHandler, boolean setWaitingHandlerFinshedWhenDone, ExceptionHandler exceptionHandler) throws InterruptedException, TimeoutException, IOException {
    identification.getObjectsDB().commit();
    identificationFeaturesGenerator = new IdentificationFeaturesGenerator(identification, identificationParameters, sequenceProvider, spectrumProvider, metrics, spectrumCountingParameters);
    matchesValidator = new MatchesValidator(new TargetDecoyMap(), new TargetDecoyMap(), new TargetDecoyMap());
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    PsmScoringParameters psmScoringPreferences = identificationParameters.getPsmScoringParameters();
    FastaParameters fastaParameters = identificationParameters.getFastaParameters();
    FastaSummary fastaSummary = FastaSummary.getSummary(projectDetails.getFastaFile(), fastaParameters, waitingHandler);
    // set the background species
    identificationParameters.getGeneParameters().setBackgroundSpeciesFromFastaSummary(fastaSummary);
    ArrayList<Integer> usedAlgorithms = projectDetails.getIdentificationAlgorithms();
    if (psmScoringPreferences.isScoringNeeded(usedAlgorithms)) {
        waitingHandler.appendReport("Estimating PSM scores.", true, true);
        PsmScorer psmScorer = new PsmScorer(fastaParameters, sequenceProvider, spectrumProvider);
        psmScorer.estimateIntermediateScores(identification, inputMap, processingParameters, identificationParameters, waitingHandler, exceptionHandler);
        if (psmScoringPreferences.isTargetDecoyNeededForPsmScoring(usedAlgorithms)) {
            if (fastaParameters.isTargetDecoy()) {
                waitingHandler.appendReport("Estimating intermediate scores probabilities.", true, true);
                psmScorer.estimateIntermediateScoreProbabilities(identification, inputMap, processingParameters, waitingHandler);
            } else {
                waitingHandler.appendReport("No decoy sequences found. Impossible to " + "estimate intermediate scores probabilities.", true, true);
            }
        }
        waitingHandler.appendReport("Scoring PSMs.", true, true);
        psmScorer.scorePsms(identification, inputMap, processingParameters, identificationParameters, waitingHandler);
    }
    identification.getObjectsDB().commit();
    System.gc();
    if (fastaParameters.isTargetDecoy()) {
        waitingHandler.appendReport("Computing assumptions probabilities.", true, true);
    } else {
        waitingHandler.appendReport("Importing assumptions scores.", true, true);
    }
    inputMap.estimateProbabilities(waitingHandler);
    waitingHandler.increasePrimaryProgressCounter();
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    identification.getObjectsDB().commit();
    System.gc();
    waitingHandler.appendReport("Saving assumptions probabilities, selecting best match, scoring modification localization.", true, true);
    PsmProcessor psmProcessor = new PsmProcessor(identification);
    psmProcessor.processPsms(inputMap, identificationParameters, matchesValidator, modificationLocalizationScorer, sequenceProvider, spectrumProvider, modificationFactory, proteinCount, processingParameters.getnThreads(), waitingHandler, exceptionHandler);
    waitingHandler.increasePrimaryProgressCounter();
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    identification.getObjectsDB().commit();
    System.gc();
    waitingHandler.appendReport("Computing PSM probabilities.", true, true);
    matchesValidator.getPsmMap().estimateProbabilities(waitingHandler);
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    identification.getObjectsDB().commit();
    System.gc();
    if (projectType == ProjectType.peptide || projectType == ProjectType.protein) {
        PeptideInference peptideInference = new PeptideInference();
        ModificationLocalizationParameters modificationScoringPreferences = identificationParameters.getModificationLocalizationParameters();
        if (modificationScoringPreferences.getAlignNonConfidentModifications()) {
            waitingHandler.appendReport("Resolving peptide inference issues.", true, true);
            peptideInference.peptideInference(identification, identificationParameters, sequenceProvider, modificationFactory, waitingHandler);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
        }
        identification.getObjectsDB().commit();
        System.gc();
    }
    String reportTxt = "Saving probabilities";
    String waitingTitle = "Saving Probabilities.";
    ;
    switch(projectType) {
        case psm:
            reportTxt += ".";
            break;
        case peptide:
            reportTxt += ", building peptides.";
            waitingTitle += " Building Peptides.";
            break;
        default:
            reportTxt += ", building peptides and proteins.";
            waitingTitle += " Building Peptides and Proteins.";
    }
    waitingHandler.appendReport(reportTxt, true, true);
    waitingHandler.setWaitingText(waitingTitle + " Please Wait...");
    attachSpectrumProbabilitiesAndBuildPeptidesAndProteins(sequenceProvider, identificationParameters.getSequenceMatchingParameters(), projectType, fastaParameters, waitingHandler);
    waitingHandler.increasePrimaryProgressCounter();
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    identification.getObjectsDB().commit();
    System.gc();
    if (projectType == ProjectType.peptide || projectType == ProjectType.protein) {
        waitingHandler.appendReport("Generating peptide map.", true, true);
        matchesValidator.fillPeptideMaps(identification, metrics, waitingHandler, identificationParameters, sequenceProvider, spectrumProvider);
        if (waitingHandler.isRunCanceled()) {
            return;
        }
        identification.getObjectsDB().commit();
        System.gc();
        waitingHandler.appendReport("Computing peptide probabilities.", true, true);
        matchesValidator.getPeptideMap().estimateProbabilities(waitingHandler);
        if (waitingHandler.isRunCanceled()) {
            return;
        }
        identification.getObjectsDB().commit();
        System.gc();
        waitingHandler.appendReport("Saving peptide probabilities.", true, true);
        matchesValidator.attachPeptideProbabilities(identification, fastaParameters, waitingHandler);
        waitingHandler.increasePrimaryProgressCounter();
        if (waitingHandler.isRunCanceled()) {
            return;
        }
        identification.getObjectsDB().commit();
        System.gc();
        if (projectType == ProjectType.protein) {
            if (identificationParameters.getProteinInferenceParameters().getSimplifyGroups()) {
                waitingHandler.appendReport("Simplifying protein groups.", true, true);
                GroupSimplification groupSimplification = new GroupSimplification();
                groupSimplification.removeRedundantGroups(identification, identificationParameters, sequenceProvider, proteinDetailsProvider, waitingHandler);
                waitingHandler.increasePrimaryProgressCounter();
                if (waitingHandler.isRunCanceled()) {
                    return;
                }
            }
            identification.getObjectsDB().commit();
            System.gc();
            ProteinInference proteinInference = new ProteinInference();
            waitingHandler.appendReport("Mapping shared peptides.", true, true);
            proteinInference.distributeSharedPeptides(identification, waitingHandler);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
            waitingHandler.appendReport("Generating protein map.", true, true);
            matchesValidator.fillProteinMap(identification, spectrumProvider, waitingHandler);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
            waitingHandler.appendReport("Selecting leading proteins, inferring peptide and protein inference status.", true, true);
            proteinInference.inferPiStatus(identification, metrics, matchesValidator.getProteinMap(), identificationParameters, sequenceProvider, proteinDetailsProvider, waitingHandler);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
            waitingHandler.appendReport("Computing protein probabilities.", true, true);
            matchesValidator.getProteinMap().estimateProbabilities(waitingHandler);
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
            waitingHandler.appendReport("Saving protein probabilities.", true, true);
            matchesValidator.attachProteinProbabilities(identification, sequenceProvider, fastaParameters, metrics, waitingHandler, identificationParameters.getFractionParameters());
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
        }
    }
    if (fastaParameters.isTargetDecoy()) {
        IdMatchValidationParameters idMatchValidationParameters = identificationParameters.getIdValidationParameters();
        if (idMatchValidationParameters.getDefaultPsmFDR() == 1 && idMatchValidationParameters.getDefaultPeptideFDR() == 1 && idMatchValidationParameters.getDefaultProteinFDR() == 1) {
            waitingHandler.appendReport("Validating identifications at 1% FDR, quality control of matches.", true, true);
        } else {
            waitingHandler.appendReport("Validating identifications, quality control of matches.", true, true);
        }
    } else {
        waitingHandler.appendReport("Quality control of matches.", true, true);
    }
    matchesValidator.validateIdentifications(identification, metrics, inputMap, waitingHandler, exceptionHandler, identificationFeaturesGenerator, sequenceProvider, proteinDetailsProvider, spectrumProvider, geneMaps, identificationParameters, projectType, processingParameters);
    waitingHandler.increasePrimaryProgressCounter();
    if (waitingHandler.isRunCanceled()) {
        return;
    }
    identification.getObjectsDB().commit();
    System.gc();
    if (projectType == ProjectType.peptide || projectType == ProjectType.protein) {
        waitingHandler.appendReport("Scoring PTMs in peptides.", true, true);
        modificationLocalizationScorer.scorePeptidePtms(identification, modificationFactory, waitingHandler, identificationParameters);
        waitingHandler.increasePrimaryProgressCounter();
        if (waitingHandler.isRunCanceled()) {
            return;
        }
        identification.getObjectsDB().commit();
        System.gc();
        if (projectType == ProjectType.protein) {
            waitingHandler.appendReport("Estimating spectrum counting scaling values.", true, true);
            ScalingFactorsEstimators scalingFactors = new ScalingFactorsEstimators(spectrumCountingParameters);
            scalingFactors.estimateScalingFactors(identification, metrics, sequenceProvider, identificationFeaturesGenerator, waitingHandler, exceptionHandler, processingParameters);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
            waitingHandler.appendReport("Scoring PTMs in proteins, gathering summary metrics.", true, true);
            ProteinProcessor proteinProcessor = new ProteinProcessor(identification, identificationParameters, identificationFeaturesGenerator, sequenceProvider);
            proteinProcessor.processProteins(modificationLocalizationScorer, metrics, modificationFactory, waitingHandler, exceptionHandler, processingParameters);
            waitingHandler.increasePrimaryProgressCounter();
            if (waitingHandler.isRunCanceled()) {
                return;
            }
            identification.getObjectsDB().commit();
            System.gc();
        }
    }
    projectCreationDuration.end();
    String report = "Identification processing completed (" + projectCreationDuration.toString() + ").";
    waitingHandler.appendReport(report, true, true);
    waitingHandler.appendReportEndLine();
    waitingHandler.appendReportEndLine();
    identification.addUrParam(new PSMaps(inputMap, matchesValidator.getPsmMap(), matchesValidator.getPeptideMap(), matchesValidator.getProteinMap()));
    if (setWaitingHandlerFinshedWhenDone) {
        waitingHandler.setRunFinished();
    }
}
Also used : ProteinInference(eu.isas.peptideshaker.protein_inference.ProteinInference) IdentificationFeaturesGenerator(com.compomics.util.experiment.identification.features.IdentificationFeaturesGenerator) PeptideInference(com.compomics.util.experiment.identification.peptide_inference.PeptideInference) MatchesValidator(eu.isas.peptideshaker.validation.MatchesValidator) ScalingFactorsEstimators(com.compomics.util.experiment.quantification.spectrumcounting.ScalingFactorsEstimators) PSMaps(eu.isas.peptideshaker.scoring.PSMaps) IdMatchValidationParameters(com.compomics.util.parameters.identification.advanced.IdMatchValidationParameters) FastaSummary(com.compomics.util.experiment.io.biology.protein.FastaSummary) PsmScorer(eu.isas.peptideshaker.scoring.psm_scoring.PsmScorer) TargetDecoyMap(eu.isas.peptideshaker.scoring.targetdecoy.TargetDecoyMap) PsmScoringParameters(com.compomics.util.parameters.identification.advanced.PsmScoringParameters) ModificationLocalizationParameters(com.compomics.util.parameters.identification.advanced.ModificationLocalizationParameters) ProteinProcessor(eu.isas.peptideshaker.processing.ProteinProcessor) GroupSimplification(eu.isas.peptideshaker.protein_inference.GroupSimplification) FastaParameters(com.compomics.util.experiment.io.biology.protein.FastaParameters) PsmProcessor(eu.isas.peptideshaker.processing.PsmProcessor)

Aggregations

IdentificationFeaturesGenerator (com.compomics.util.experiment.identification.features.IdentificationFeaturesGenerator)1 PeptideInference (com.compomics.util.experiment.identification.peptide_inference.PeptideInference)1 FastaParameters (com.compomics.util.experiment.io.biology.protein.FastaParameters)1 FastaSummary (com.compomics.util.experiment.io.biology.protein.FastaSummary)1 ScalingFactorsEstimators (com.compomics.util.experiment.quantification.spectrumcounting.ScalingFactorsEstimators)1 IdMatchValidationParameters (com.compomics.util.parameters.identification.advanced.IdMatchValidationParameters)1 ModificationLocalizationParameters (com.compomics.util.parameters.identification.advanced.ModificationLocalizationParameters)1 PsmScoringParameters (com.compomics.util.parameters.identification.advanced.PsmScoringParameters)1 ProteinProcessor (eu.isas.peptideshaker.processing.ProteinProcessor)1 PsmProcessor (eu.isas.peptideshaker.processing.PsmProcessor)1 GroupSimplification (eu.isas.peptideshaker.protein_inference.GroupSimplification)1 ProteinInference (eu.isas.peptideshaker.protein_inference.ProteinInference)1 PSMaps (eu.isas.peptideshaker.scoring.PSMaps)1 PsmScorer (eu.isas.peptideshaker.scoring.psm_scoring.PsmScorer)1 TargetDecoyMap (eu.isas.peptideshaker.scoring.targetdecoy.TargetDecoyMap)1 MatchesValidator (eu.isas.peptideshaker.validation.MatchesValidator)1