Search in sources :

Example 1 with MassToFingerprintGroupListCollection

use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.

the class AutomatedLossFingerprintAnnotationScoreInitialiser method initScoreParameters.

@Override
public void initScoreParameters(Settings settings) throws Exception {
    if (!settings.containsKey(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME) || settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME) == null) {
        MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = new MassToFingerprintGroupListCollection();
        DefaultPeakList peakList = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
        Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
        Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
        BufferedReader breader = null;
        java.io.InputStream is = null;
        if (settings.containsKey(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME) && settings.get(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME) != null) {
            breader = new BufferedReader(new FileReader(new File((String) settings.get(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME))));
        } else {
            String filename = "loss_annotations_neg.txt";
            if ((Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME))
                filename = "loss_annotations_pos.txt";
            is = AutomatedPeakFingerprintAnnotationScoreInitialiser.class.getResourceAsStream("/" + filename);
            breader = new java.io.BufferedReader(new java.io.InputStreamReader(is));
        }
        Double neutralPrecursorMass = (Double) settings.get(VariableNames.PRECURSOR_NEUTRAL_MASS_NAME);
        Double adductMass = Constants.getIonisationTypeMassCorrection(Constants.ADDUCT_NOMINAL_MASSES.indexOf((Integer) settings.get(VariableNames.PRECURSOR_ION_MODE_NAME)), (Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME));
        java.util.ArrayList<Double> massDifferences = this.calculatePeakDifferences(peakList, neutralPrecursorMass, adductMass);
        java.util.ArrayList<Double> uniqueMassDifferences = this.calculateUniquePeakDifferences(massDifferences, mzppm, mzabs);
        java.util.LinkedList<Double> lossMassesFound = new java.util.LinkedList<Double>();
        String line = "";
        int numMatchedObservationsMerged = 0;
        java.util.HashMap<Double, MassToFingerprintGroupList> mergedFingerprintGroupLists = new java.util.HashMap<Double, MassToFingerprintGroupList>();
        this.setPseudoCountValues(settings);
        // first add non-matched masses with dummy fingerprint "0"
        // these masses are present in the first line of the annotation file: mass[:counts]
        String nonMatchedMassesString = breader.readLine().trim();
        int numNonMatchElements = 0;
        int numNonMatchOccurrences = 0;
        if (!nonMatchedMassesString.equals("NA")) {
            // masses are separated by ";"
            String[] tmp = nonMatchedMassesString.split(";");
            numNonMatchElements = tmp.length;
            for (int k = 0; k < tmp.length; k++) {
                // run over all masses
                // split by ":" to separate mass[:counts]
                String[] tmp2 = tmp[k].split("\\s+");
                int count = 1;
                // create mass value
                Double newMass = Double.parseDouble(tmp2[0]);
                // if count is present use count else use 1 (default)
                if (tmp2.length == 2)
                    count = Integer.parseInt(tmp2[1]);
                // save number non-matched occurences
                numNonMatchOccurrences += count;
                // check whether the newMass is also present in our found peak list losses
                Double matchedMass = this.containsMass(newMass, uniqueMassDifferences, mzabs, mzppm);
                // if not present and already larger than largest peak mass stop here
                if (matchedMass == null && newMass > peakList.getMaximumMassValue())
                    break;
                if (matchedMass != null) {
                    // if loss is present in our peak list add it to the annotation list
                    // prepare new element
                    FingerprintGroup group = new FingerprintGroup(1.0);
                    group.setNumberObserved(count);
                    group.setFingerprint("0");
                    if (mergedFingerprintGroupLists.containsKey(matchedMass)) {
                        // check if the mass was already inserted
                        MassToFingerprintGroupList currentGroupList = mergedFingerprintGroupLists.get(matchedMass);
                        FingerprintGroup curGroup = currentGroupList.getElementByFingerprint(group.getFingerprint());
                        // check if fingerprint was already inserted
                        if (// if not simply add it
                        curGroup == null)
                            // if not simply add it
                            currentGroupList.addElement(group);
                        else {
                            // if already present decrease number observed elements (as already observed)
                            numNonMatchElements--;
                            // adapt values
                            curGroup.setNumberObserved(curGroup.getNumberObserved() + group.getNumberObserved());
                            curGroup.setProbability(curGroup.getProbability() + group.getProbability());
                        }
                    } else {
                        // if mass not yet present simply add it
                        MassToFingerprintGroupList currentGroupList = new MassToFingerprintGroupList(matchedMass);
                        currentGroupList.addElement(group);
                        mergedFingerprintGroupLists.put(matchedMass, currentGroupList);
                    }
                }
            }
        }
        // now add loss-fingerprint assignments which were annotated in the training
        while ((line = breader.readLine()) != null) {
            line = line.trim();
            if (line.length() == 0)
                continue;
            if (line.startsWith("#"))
                continue;
            if (line.startsWith("SUMMARY")) {
                String[] tmp = line.split("\\s+");
                // sum overall occurrences
                settings.set(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_COUNT_NAME, Double.parseDouble(tmp[2]) + numNonMatchOccurrences);
                // number different peak pairs matched
                settings.set(VariableNames.LOSS_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME, Double.parseDouble(tmp[1]) - numMatchedObservationsMerged);
                // number different peak pairs non-matched
                settings.set(VariableNames.LOSS_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME, (double) numNonMatchElements);
                continue;
            }
            String[] tmp = line.split("\\s+");
            Double loss = Double.parseDouble(tmp[0]);
            // check whether the current loss in our annotation is also present in the peak list
            Double matchedMass = this.containsMass(loss, uniqueMassDifferences, mzabs, mzppm);
            if (matchedMass != null) {
                // if yes we need to consider it
                // create fingerprint groups from annotation entry
                FingerprintGroup[] groups = this.getFingerprintGroup(tmp);
                if (mergedFingerprintGroupLists.containsKey(matchedMass)) {
                    // check whether mass is already present
                    MassToFingerprintGroupList currentGroupList = mergedFingerprintGroupLists.get(matchedMass);
                    for (int i = 0; i < groups.length; i++) {
                        // check if fingerprint is already inserted
                        FingerprintGroup curGroup = currentGroupList.getElementByFingerprint(groups[i].getFingerprint());
                        if (// if not simply add it
                        curGroup == null)
                            // if not simply add it
                            currentGroupList.addElement(groups[i]);
                        else {
                            // otherwise increase the number of matched observations to adapt number of unique tupels
                            if (curGroup.getFingerprint().getSize() != 1)
                                numMatchedObservationsMerged++;
                            // adapt loss-fingerprint assignment values
                            curGroup.setNumberObserved(curGroup.getNumberObserved() + groups[i].getNumberObserved());
                            curGroup.setProbability(curGroup.getProbability() + groups[i].getProbability());
                        }
                    }
                } else {
                    // if mass not yet present simply add it
                    MassToFingerprintGroupList currentGroupList = new MassToFingerprintGroupList(matchedMass);
                    for (int i = 0; i < groups.length; i++) currentGroupList.addElement(groups[i]);
                    mergedFingerprintGroupLists.put(matchedMass, currentGroupList);
                }
            }
        }
        java.util.Iterator<Double> it = mergedFingerprintGroupLists.keySet().iterator();
        while (it.hasNext()) {
            lossToFingerprintGroupListCollection.addElementSorted(mergedFingerprintGroupLists.get(it.next()));
        }
        // store all mass differences (losses) found in the peak list
        for (int i = 0; i < massDifferences.size(); i++) {
            if (lossToFingerprintGroupListCollection.getElementByPeak(massDifferences.get(i), mzppm, mzabs) != null)
                lossMassesFound.add(massDifferences.get(i));
        }
        breader.close();
        settings.set(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME, lossMassesFound);
        settings.set(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME, lossToFingerprintGroupListCollection);
    }
}
Also used : MassToFingerprintsHashMap(de.ipbhalle.metfraglib.substructure.MassToFingerprintsHashMap) MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) DefaultPeakList(de.ipbhalle.metfraglib.list.DefaultPeakList) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) FileReader(java.io.FileReader) FingerprintGroup(de.ipbhalle.metfraglib.substructure.FingerprintGroup) Fingerprint(de.ipbhalle.metfraglib.fingerprint.Fingerprint) BufferedReader(java.io.BufferedReader) BufferedReader(java.io.BufferedReader) File(java.io.File)

Example 2 with MassToFingerprintGroupListCollection

use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.

the class AutomatedLossFingerprintAnnotationScoreInitialiser method postProcessScoreParameters.

public void postProcessScoreParameters(Settings settings) throws AtomTypeNotKnownFromInputListException, Exception {
    CombinedSingleCandidateMetFragProcess[] processes = (CombinedSingleCandidateMetFragProcess[]) settings.get(VariableNames.METFRAG_PROCESSES_NAME);
    // fingerprints not seen in training
    MassToFingerprintsHashMap lossMassToFingerprints = new MassToFingerprintsHashMap();
    MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
    Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
    Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
    int ionmode = (Integer) settings.get(VariableNames.PRECURSOR_ION_MODE_NAME);
    boolean ispositive = (Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME);
    double adductMass = Constants.getIonisationTypeMassCorrection(Constants.ADDUCT_NOMINAL_MASSES.indexOf(ionmode), ispositive);
    double precursorMass = (Double) settings.get(VariableNames.PRECURSOR_NEUTRAL_MASS_NAME);
    double ionmass = MathTools.round(precursorMass + adductMass);
    Fingerprint fingerprint = new Fingerprint((String) settings.get(VariableNames.FINGERPRINT_TYPE_NAME));
    for (CombinedSingleCandidateMetFragProcess scmfp : processes) {
        /*
			 * check whether the single run was successful
			 */
        if (scmfp.wasSuccessful()) {
            ICandidate candidate = scmfp.getScoredPrecursorCandidates()[0];
            java.util.ArrayList<MassFingerprintMatch> lossMatchlist = new java.util.ArrayList<MassFingerprintMatch>();
            MatchList matchlist = candidate.getMatchList();
            if (matchlist != null) {
                candidate.initialisePrecursorCandidate();
                for (int i = 0; i < matchlist.getNumberElements(); i++) {
                    IMatch matchI = matchlist.getElement(i);
                    IFragment fragmentI = matchI.getBestMatchedFragment();
                    double peakMassI = matchI.getMatchedPeak().getMass();
                    for (int j = i + 1; j < matchlist.getNumberElements(); j++) {
                        IMatch matchJ = matchlist.getElement(j);
                        double peakMassJ = matchJ.getMatchedPeak().getMass();
                        IFragment fragmentJ = matchJ.getBestMatchedFragment();
                        if (fragmentJ.isRealSubStructure(fragmentI)) {
                            double diff = MathTools.round(peakMassJ - peakMassI);
                            MassToFingerprintGroupList matchingLossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(diff, mzppm, mzabs);
                            if (matchingLossToFingerprintGroupList == null)
                                continue;
                            IFragment diffFragment = fragmentJ.getDifferenceFragment(candidate.getPrecursorMolecule(), fragmentI);
                            if (diffFragment == null)
                                continue;
                            IAtomContainer con = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), diffFragment);
                            lossMatchlist.add(new MassFingerprintMatch(diff, fingerprint.getNormalizedFastBitArrayFingerprint(con)));
                        }
                    }
                    // do the same for the precursor ion
                    double diff = MathTools.round(ionmass - peakMassI);
                    MassToFingerprintGroupList matchingLossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(diff, mzppm, mzabs);
                    if (matchingLossToFingerprintGroupList == null)
                        continue;
                    IFragment diffFragment = fragmentI.getDifferenceFragment(candidate.getPrecursorMolecule());
                    if (diffFragment == null)
                        continue;
                    IAtomContainer con = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), diffFragment);
                    lossMatchlist.add(new MassFingerprintMatch(diff, fingerprint.getNormalizedFastBitArrayFingerprint(con)));
                }
            }
            // java.util.LinkedList<Double> nonExplainedLosses = this.getNonExplainedLoss(peakList, matchlist);
            for (int j = 0; j < lossMatchlist.size(); j++) {
                MassFingerprintMatch lossMatch = lossMatchlist.get(j);
                MassToFingerprintGroupList lossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(lossMatch.getMass(), mzppm, mzabs);
                // if not loss not in our annotation list, there's no need to consider it
                if (lossToFingerprintGroupList == null)
                    continue;
                // lossMatch.setMass(lossToFingerprintGroupList.getPeakmz());
                FastBitArray currentFingerprint = lossMatch.getFingerprint();
                // check whether fingerprint was observed for current peak mass in the training data
                if (!lossToFingerprintGroupList.containsFingerprint(currentFingerprint)) {
                    // if not add the fingerprint to background by addFingerprint function
                    // addFingerprint checks also whether fingerprint was already added
                    lossMassToFingerprints.addFingerprint(lossMatch.getMass(), currentFingerprint);
                }
            }
            java.util.LinkedList<?> lossMassesFoundInPeakList = (java.util.LinkedList<?>) settings.get(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME);
            // important! now add all losses not assigned by that candidates
            // this is to equalize all loss match lists in length over all candidates
            this.addNonExplainedLosses(lossMassesFoundInPeakList, lossMatchlist);
            candidate.setProperty("LossMatchList", lossMatchlist);
        }
    }
    // alpha
    double alpha = (double) settings.get(VariableNames.LOSS_FINGERPRINT_ANNOTATION_ALPHA_VALUE_NAME);
    // beta
    double beta = (double) settings.get(VariableNames.LOSS_FINGERPRINT_ANNOTATION_BETA_VALUE_NAME);
    // f_s
    double f_seen_matched = (double) settings.get(VariableNames.LOSS_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME);
    // f_s
    double f_seen_non_matched = (double) settings.get(VariableNames.LOSS_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME);
    // f_u
    double f_unseen_matched = lossMassToFingerprints.getOverallMatchedSize();
    // f_u
    double f_unseen_non_matched = lossMassToFingerprints.getOverallNonMatchedSize();
    // \sum_N
    double sumFingerprintFrequencies = (double) settings.get(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_COUNT_NAME);
    // set value for denominator of P(f,m)
    double denominatorValue = sumFingerprintFrequencies + alpha * (f_seen_matched + f_unseen_matched) + beta * (f_seen_non_matched + f_unseen_non_matched);
    settings.set(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_VALUE_NAME, denominatorValue);
    // P(f,m) F_u
    double alphaProbability = alpha / denominatorValue;
    // p(f,m) not annotated
    double betaProbability = beta / denominatorValue;
    for (int i = 0; i < lossToFingerprintGroupListCollection.getNumberElements(); i++) {
        MassToFingerprintGroupList groupList = lossToFingerprintGroupListCollection.getElement(i);
        // sum_f P(f,m)
        // calculate sum of MF_s (including the alpha count) and the joint probabilities
        // at this stage getProbability() returns the absolute counts from the annotation files
        double sum_f = 0.0;
        double sumFsProbabilities = 0.0;
        for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
            // first calculate P(f,m)
            if (groupList.getElement(ii).getFingerprint().getSize() != 1)
                groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + alpha) / denominatorValue);
            else
                groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + beta) / denominatorValue);
            // sum_f P(f,m) -> for F_s
            sumFsProbabilities += groupList.getElement(ii).getJointProbability();
        }
        // calculate the sum of probabilities for un-observed fingerprints for the current mass
        double sumFuProbabilities = alphaProbability * lossMassToFingerprints.getSizeMatched(groupList.getPeakmz());
        // not needed as it's defined by fingerprint = "0"
        // sumFuProbabilities += betaProbability * lossMassToFingerprints.getSizeNonMatched(groupList.getPeakmz());
        sumFuProbabilities += betaProbability;
        sum_f += sumFsProbabilities;
        sum_f += sumFuProbabilities;
        for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
            // second calculate P(f|m)
            groupList.getElement(ii).setConditionalProbability_sp(groupList.getElement(ii).getJointProbability() / sum_f);
        }
        groupList.setAlphaProb(alphaProbability / sum_f);
        groupList.setBetaProb(betaProbability / sum_f);
        groupList.setProbabilityToConditionalProbability_sp();
        groupList.calculateSumProbabilites();
    }
    return;
}
Also used : IMatch(de.ipbhalle.metfraglib.interfaces.IMatch) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) MassFingerprintMatch(de.ipbhalle.metfraglib.match.MassFingerprintMatch) Fingerprint(de.ipbhalle.metfraglib.fingerprint.Fingerprint) MatchList(de.ipbhalle.metfraglib.list.MatchList) CombinedSingleCandidateMetFragProcess(de.ipbhalle.metfraglib.process.CombinedSingleCandidateMetFragProcess) Fingerprint(de.ipbhalle.metfraglib.fingerprint.Fingerprint) MassToFingerprintsHashMap(de.ipbhalle.metfraglib.substructure.MassToFingerprintsHashMap) IFragment(de.ipbhalle.metfraglib.interfaces.IFragment)

Example 3 with MassToFingerprintGroupListCollection

use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.

the class AutomatedPeakFingerprintAnnotationScore method singlePostCalculate.

@Override
public void singlePostCalculate() {
    this.value = 0.0;
    MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) this.settings.get(VariableNames.PEAK_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
    java.util.ArrayList<?> peakMatchList = (java.util.ArrayList<?>) this.candidate.getProperty("PeakMatchList");
    int matches = 0;
    // get foreground fingerprint observations (m_f_observed)
    java.util.ArrayList<Double> matchMasses = new java.util.ArrayList<Double>();
    java.util.ArrayList<Double> matchProb = new java.util.ArrayList<Double>();
    // found - 1; non-found - 2 (fp="0"); alpha - 3; beta - 4
    java.util.ArrayList<Integer> matchType = new java.util.ArrayList<Integer>();
    // get foreground fingerprint observations (m_f_observed)
    for (int i = 0; i < peakToFingerprintGroupListCollection.getNumberElements(); i++) {
        // get f_m_observed
        MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElement(i);
        Double currentMass = peakToFingerprintGroupList.getPeakmz();
        MassFingerprintMatch currentMatch = getMatchByMass(peakMatchList, currentMass);
        if (currentMatch == null) {
            FingerprintGroup fg = peakToFingerprintGroupList.getElementByFingerprint(new FastBitArray("0"));
            if (fg == null) {
                matchProb.add(peakToFingerprintGroupList.getBetaProb());
                matchType.add(4);
                this.value += Math.log(peakToFingerprintGroupList.getBetaProb());
            } else {
                matchProb.add(fg.getProbability());
                matchType.add(2);
                this.value += Math.log(fg.getProbability());
            }
            matchMasses.add(currentMass);
        } else {
            FastBitArray currentFingerprint = new FastBitArray(currentMatch.getFingerprint());
            // ToDo: at this stage try to check all fragments not only the best one
            // (p(m,f) + alpha) / sum_F(p(m,f)) + |F| * alpha
            double matching_prob = peakToFingerprintGroupList.getMatchingProbability(currentFingerprint);
            // |F|
            if (matching_prob != 0.0) {
                this.value += Math.log(matching_prob);
                matchProb.add(matching_prob);
                matchMasses.add(currentMass);
                if (currentFingerprint.getSize() != 1) {
                    matches++;
                    matchType.add(1);
                } else
                    matchType.add(2);
            } else {
                if (currentFingerprint.equals(new FastBitArray("0")) && peakToFingerprintGroupList.getElementByFingerprint(currentFingerprint) == null) {
                    matchType.add(4);
                    matchProb.add(peakToFingerprintGroupList.getBetaProb());
                    this.value += Math.log(peakToFingerprintGroupList.getBetaProb());
                } else {
                    this.value += Math.log(peakToFingerprintGroupList.getAlphaProb());
                    matchProb.add(peakToFingerprintGroupList.getAlphaProb());
                    matchType.add(3);
                }
                matchMasses.add(currentMass);
            }
        }
    }
    if (peakToFingerprintGroupListCollection.getNumberElements() == 0)
        this.value = 0.0;
    this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore_Matches", matches);
    this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore", this.value);
    this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore_Probtypes", this.getProbTypeString(matchProb, matchType, matchMasses));
    this.candidate.removeProperty("PeakMatchList");
}
Also used : MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) FingerprintGroup(de.ipbhalle.metfraglib.substructure.FingerprintGroup) MassFingerprintMatch(de.ipbhalle.metfraglib.match.MassFingerprintMatch)

Example 4 with MassToFingerprintGroupListCollection

use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.

the class AutomatedLossFingerprintAnnotationScore method singlePostCalculate.

@Override
public void singlePostCalculate() {
    this.value = 0.0;
    MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) this.settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
    // all losses found in peak list
    java.util.LinkedList<?> lossMassesFoundInPeakList = (java.util.LinkedList<?>) ((java.util.LinkedList<?>) this.settings.get(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME)).clone();
    int matches = 0;
    Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
    Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
    // get match list of the current candidate
    java.util.ArrayList<?> lossMatchlist = (java.util.ArrayList<?>) this.candidate.getProperty("LossMatchList");
    java.util.ArrayList<Double> matchMasses = new java.util.ArrayList<Double>();
    java.util.ArrayList<Double> matchProb = new java.util.ArrayList<Double>();
    // found - 1; non-found - 2 (fp="0"); alpha - 3; beta - 4
    java.util.ArrayList<Integer> matchType = new java.util.ArrayList<Integer>();
    // get foreground fingerprint observations (m_f_observed)
    for (int i = 0; i < lossMatchlist.size(); i++) {
        // get f_m_observed
        MassFingerprintMatch currentMatch = (MassFingerprintMatch) lossMatchlist.get(i);
        lossMassesFoundInPeakList.remove(lossMassesFoundInPeakList.indexOf(currentMatch.getMass()));
        MassToFingerprintGroupList lossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(currentMatch.getMass(), mzppm, mzabs);
        // MassFingerprintMatch currentMatch = this.getMatchByMass(matchlist, currentMass);
        FastBitArray currentFingerprint = new FastBitArray(currentMatch.getFingerprint());
        // ToDo: at this stage try to check all fragments not only the best one
        // (p(m,f) + alpha) / sum_F(p(m,f)) + |F| * alpha
        double matching_prob = lossToFingerprintGroupList.getMatchingProbability(currentFingerprint);
        if (matching_prob != 0.0) {
            // if probability of current fingerprint is non-zero, it was observed in the training
            matches++;
            this.value += Math.log(matching_prob);
            matchProb.add(matching_prob);
            if (// if valid fingerprint
            currentFingerprint.getSize() != 1)
                // if valid fingerprint
                matchType.add(1);
            else
                // if size of fingerprint is 1 then it's the dummy fingerprint
                matchType.add(2);
            matchMasses.add(currentMatch.getMass());
        } else {
            // if not type 1 or type 2
            matchMasses.add(currentMatch.getMass());
            if (currentFingerprint.getSize() != 1) {
                this.value += Math.log(lossToFingerprintGroupList.getAlphaProb());
                matchProb.add(lossToFingerprintGroupList.getAlphaProb());
                matchType.add(3);
            } else {
                this.value += Math.log(lossToFingerprintGroupList.getBetaProb());
                matchProb.add(lossToFingerprintGroupList.getBetaProb());
                matchType.add(4);
            }
        }
    }
    if (lossToFingerprintGroupListCollection.getNumberElements() == 0)
        this.value = 0.0;
    this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore_Matches", matches);
    this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore", this.value);
    this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore_Probtypes", this.getProbTypeString(matchProb, matchType, matchMasses));
    this.candidate.removeProperty("LossMatchList");
}
Also used : MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) MassFingerprintMatch(de.ipbhalle.metfraglib.match.MassFingerprintMatch)

Example 5 with MassToFingerprintGroupListCollection

use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.

the class AutomatedPeakFingerprintAnnotationScoreInitialiser method postProcessScoreParameters.

public void postProcessScoreParameters(Settings settings) throws Exception {
    CombinedSingleCandidateMetFragProcess[] processes = (CombinedSingleCandidateMetFragProcess[]) settings.get(VariableNames.METFRAG_PROCESSES_NAME);
    // to determine F_u
    MassToFingerprintsHashMap peakMassToFingerprints = new MassToFingerprintsHashMap();
    MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) settings.get(VariableNames.PEAK_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
    Fingerprint fingerprint = new Fingerprint((String) settings.get(VariableNames.FINGERPRINT_TYPE_NAME));
    DefaultPeakList peakList = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
    for (CombinedSingleCandidateMetFragProcess scmfp : processes) {
        /*
			 * check whether the single run was successful
			 */
        if (scmfp.wasSuccessful()) {
            ICandidate candidate = scmfp.getScoredPrecursorCandidates()[0];
            MatchList matchlist = candidate.getMatchList();
            java.util.LinkedList<Double> nonExplainedPeaks = this.getNonExplainedPeaks(peakList, matchlist);
            java.util.ArrayList<MassFingerprintMatch> peakMatchlist = new java.util.ArrayList<MassFingerprintMatch>();
            int numberMatchedPeaks = matchlist == null ? 0 : matchlist.getNumberElements();
            candidate.initialisePrecursorCandidate();
            for (int j = 0; j < numberMatchedPeaks + nonExplainedPeaks.size(); j++) {
                FastBitArray currentFingerprint = null;
                Double mass = null;
                // check if it's a valid match (peak-fragment assignment)
                if (j < matchlist.getNumberElements()) {
                    IMatch match = matchlist.getElement(j);
                    mass = match.getMatchedPeak().getMass();
                    IFragment frag = match.getBestMatchedFragment();
                    try {
                        IAtomContainer conPre = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), frag);
                        // IAtomContainer con = MoleculeFunctions.convertImplicitToExplicitHydrogens(conPre);
                        currentFingerprint = fingerprint.getNormalizedFastBitArrayFingerprint(conPre);
                    } catch (InvalidSmilesException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    } catch (CDKException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                } else {
                    // else it must be a non-matched peak
                    mass = nonExplainedPeaks.get(j - matchlist.getNumberElements());
                    currentFingerprint = new FastBitArray("0");
                }
                MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeak(mass);
                if (peakToFingerprintGroupList == null)
                    continue;
                peakMatchlist.add(new MassFingerprintMatch(mass, currentFingerprint));
                // check whether fingerprint was observed for current peak mass in the training data
                if (!peakToFingerprintGroupList.containsFingerprint(currentFingerprint)) {
                    // if not add the fingerprint to background by addFingerprint function
                    // addFingerprint checks also whether fingerprint was already added
                    peakMassToFingerprints.addFingerprint(mass, currentFingerprint);
                }
            }
            candidate.setProperty("PeakMatchList", peakMatchlist);
        }
    }
    // f_s
    double f_seen_matched = (double) settings.get(VariableNames.PEAK_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME);
    // f_s
    double f_seen_non_matched = (double) settings.get(VariableNames.PEAK_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME);
    // f_u
    double f_unseen_matched = peakMassToFingerprints.getOverallMatchedSize();
    // f_u
    double f_unseen_non_matched = peakMassToFingerprints.getOverallNonMatchedSize();
    // alpha
    double alpha = (double) settings.get(VariableNames.PEAK_FINGERPRINT_ANNOTATION_ALPHA_VALUE_NAME);
    // beta
    double beta = (double) settings.get(VariableNames.PEAK_FINGERPRINT_ANNOTATION_BETA_VALUE_NAME);
    // \sum_N \sum_Ln 1
    double sumFingerprintFrequencies = (double) settings.get(VariableNames.PEAK_FINGERPRINT_DENOMINATOR_COUNT_NAME);
    // set value for denominator of P(f,m)
    double denominatorValue = sumFingerprintFrequencies + alpha * (f_seen_matched + f_unseen_matched) + beta * (f_seen_non_matched + f_unseen_non_matched);
    settings.set(VariableNames.PEAK_FINGERPRINT_DENOMINATOR_VALUE_NAME, denominatorValue);
    // P(f,m) F_u
    double alphaProbability = alpha / denominatorValue;
    // p(f,m) not annotated
    double betaProbability = beta / denominatorValue;
    for (int i = 0; i < peakToFingerprintGroupListCollection.getNumberElements(); i++) {
        MassToFingerprintGroupList groupList = peakToFingerprintGroupListCollection.getElement(i);
        // sum_f P(f,m)
        // calculate sum of MF_s (including the alpha count) and the joint probabilities
        // at this stage getProbability() returns the absolute counts from the annotation files
        double sum_f = 0.0;
        double sumFsProbabilities = 0.0;
        for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
            // first calculate P(f,m)
            if (groupList.getElement(ii).getFingerprint().getSize() != 1)
                groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + alpha) / denominatorValue);
            else
                groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + beta) / denominatorValue);
            // sum_f P(f,m) -> for F_s
            sumFsProbabilities += groupList.getElement(ii).getJointProbability();
        }
        // calculate the sum of probabilities for un-observed fingerprints for the current mass
        double sumFuProbabilities = alphaProbability * peakMassToFingerprints.getSizeMatched(groupList.getPeakmz());
        sumFuProbabilities += betaProbability;
        sum_f += sumFsProbabilities;
        sum_f += sumFuProbabilities;
        for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
            // second calculate P(f|m)
            groupList.getElement(ii).setConditionalProbability_sp(groupList.getElement(ii).getJointProbability() / sum_f);
        }
        groupList.setAlphaProb(alphaProbability / sum_f);
        groupList.setBetaProb(betaProbability / sum_f);
        groupList.setProbabilityToConditionalProbability_sp();
        groupList.calculateSumProbabilites();
    }
    return;
}
Also used : IMatch(de.ipbhalle.metfraglib.interfaces.IMatch) Fingerprint(de.ipbhalle.metfraglib.fingerprint.Fingerprint) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) MatchList(de.ipbhalle.metfraglib.list.MatchList) CDKException(org.openscience.cdk.exception.CDKException) MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) CombinedSingleCandidateMetFragProcess(de.ipbhalle.metfraglib.process.CombinedSingleCandidateMetFragProcess) DefaultPeakList(de.ipbhalle.metfraglib.list.DefaultPeakList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) Fingerprint(de.ipbhalle.metfraglib.fingerprint.Fingerprint) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) MassToFingerprintsHashMap(de.ipbhalle.metfraglib.substructure.MassToFingerprintsHashMap) IFragment(de.ipbhalle.metfraglib.interfaces.IFragment) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) InvalidSmilesException(org.openscience.cdk.exception.InvalidSmilesException) MassFingerprintMatch(de.ipbhalle.metfraglib.match.MassFingerprintMatch)

Aggregations

MassToFingerprintGroupList (de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList)8 MassToFingerprintGroupListCollection (de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection)8 FastBitArray (de.ipbhalle.metfraglib.FastBitArray)6 FingerprintGroup (de.ipbhalle.metfraglib.substructure.FingerprintGroup)5 Fingerprint (de.ipbhalle.metfraglib.fingerprint.Fingerprint)4 ICandidate (de.ipbhalle.metfraglib.interfaces.ICandidate)4 MassFingerprintMatch (de.ipbhalle.metfraglib.match.MassFingerprintMatch)4 MassToFingerprintsHashMap (de.ipbhalle.metfraglib.substructure.MassToFingerprintsHashMap)4 File (java.io.File)4 DefaultPeakList (de.ipbhalle.metfraglib.list.DefaultPeakList)3 LocalCSVDatabase (de.ipbhalle.metfraglib.database.LocalCSVDatabase)2 LocalPSVDatabase (de.ipbhalle.metfraglib.database.LocalPSVDatabase)2 MultipleHeadersFoundInInputDatabaseException (de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)2 IDatabase (de.ipbhalle.metfraglib.interfaces.IDatabase)2 IFragment (de.ipbhalle.metfraglib.interfaces.IFragment)2 IMatch (de.ipbhalle.metfraglib.interfaces.IMatch)2 CandidateList (de.ipbhalle.metfraglib.list.CandidateList)2 MatchList (de.ipbhalle.metfraglib.list.MatchList)2 CombinedSingleCandidateMetFragProcess (de.ipbhalle.metfraglib.process.CombinedSingleCandidateMetFragProcess)2 Settings (de.ipbhalle.metfraglib.settings.Settings)2