use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.
the class AutomatedLossFingerprintAnnotationScoreInitialiser method initScoreParameters.
@Override
public void initScoreParameters(Settings settings) throws Exception {
if (!settings.containsKey(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME) || settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME) == null) {
MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = new MassToFingerprintGroupListCollection();
DefaultPeakList peakList = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
BufferedReader breader = null;
java.io.InputStream is = null;
if (settings.containsKey(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME) && settings.get(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME) != null) {
breader = new BufferedReader(new FileReader(new File((String) settings.get(VariableNames.FINGERPRINT_LOSS_ANNOTATION_FILE_NAME))));
} else {
String filename = "loss_annotations_neg.txt";
if ((Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME))
filename = "loss_annotations_pos.txt";
is = AutomatedPeakFingerprintAnnotationScoreInitialiser.class.getResourceAsStream("/" + filename);
breader = new java.io.BufferedReader(new java.io.InputStreamReader(is));
}
Double neutralPrecursorMass = (Double) settings.get(VariableNames.PRECURSOR_NEUTRAL_MASS_NAME);
Double adductMass = Constants.getIonisationTypeMassCorrection(Constants.ADDUCT_NOMINAL_MASSES.indexOf((Integer) settings.get(VariableNames.PRECURSOR_ION_MODE_NAME)), (Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME));
java.util.ArrayList<Double> massDifferences = this.calculatePeakDifferences(peakList, neutralPrecursorMass, adductMass);
java.util.ArrayList<Double> uniqueMassDifferences = this.calculateUniquePeakDifferences(massDifferences, mzppm, mzabs);
java.util.LinkedList<Double> lossMassesFound = new java.util.LinkedList<Double>();
String line = "";
int numMatchedObservationsMerged = 0;
java.util.HashMap<Double, MassToFingerprintGroupList> mergedFingerprintGroupLists = new java.util.HashMap<Double, MassToFingerprintGroupList>();
this.setPseudoCountValues(settings);
// first add non-matched masses with dummy fingerprint "0"
// these masses are present in the first line of the annotation file: mass[:counts]
String nonMatchedMassesString = breader.readLine().trim();
int numNonMatchElements = 0;
int numNonMatchOccurrences = 0;
if (!nonMatchedMassesString.equals("NA")) {
// masses are separated by ";"
String[] tmp = nonMatchedMassesString.split(";");
numNonMatchElements = tmp.length;
for (int k = 0; k < tmp.length; k++) {
// run over all masses
// split by ":" to separate mass[:counts]
String[] tmp2 = tmp[k].split("\\s+");
int count = 1;
// create mass value
Double newMass = Double.parseDouble(tmp2[0]);
// if count is present use count else use 1 (default)
if (tmp2.length == 2)
count = Integer.parseInt(tmp2[1]);
// save number non-matched occurences
numNonMatchOccurrences += count;
// check whether the newMass is also present in our found peak list losses
Double matchedMass = this.containsMass(newMass, uniqueMassDifferences, mzabs, mzppm);
// if not present and already larger than largest peak mass stop here
if (matchedMass == null && newMass > peakList.getMaximumMassValue())
break;
if (matchedMass != null) {
// if loss is present in our peak list add it to the annotation list
// prepare new element
FingerprintGroup group = new FingerprintGroup(1.0);
group.setNumberObserved(count);
group.setFingerprint("0");
if (mergedFingerprintGroupLists.containsKey(matchedMass)) {
// check if the mass was already inserted
MassToFingerprintGroupList currentGroupList = mergedFingerprintGroupLists.get(matchedMass);
FingerprintGroup curGroup = currentGroupList.getElementByFingerprint(group.getFingerprint());
// check if fingerprint was already inserted
if (// if not simply add it
curGroup == null)
// if not simply add it
currentGroupList.addElement(group);
else {
// if already present decrease number observed elements (as already observed)
numNonMatchElements--;
// adapt values
curGroup.setNumberObserved(curGroup.getNumberObserved() + group.getNumberObserved());
curGroup.setProbability(curGroup.getProbability() + group.getProbability());
}
} else {
// if mass not yet present simply add it
MassToFingerprintGroupList currentGroupList = new MassToFingerprintGroupList(matchedMass);
currentGroupList.addElement(group);
mergedFingerprintGroupLists.put(matchedMass, currentGroupList);
}
}
}
}
// now add loss-fingerprint assignments which were annotated in the training
while ((line = breader.readLine()) != null) {
line = line.trim();
if (line.length() == 0)
continue;
if (line.startsWith("#"))
continue;
if (line.startsWith("SUMMARY")) {
String[] tmp = line.split("\\s+");
// sum overall occurrences
settings.set(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_COUNT_NAME, Double.parseDouble(tmp[2]) + numNonMatchOccurrences);
// number different peak pairs matched
settings.set(VariableNames.LOSS_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME, Double.parseDouble(tmp[1]) - numMatchedObservationsMerged);
// number different peak pairs non-matched
settings.set(VariableNames.LOSS_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME, (double) numNonMatchElements);
continue;
}
String[] tmp = line.split("\\s+");
Double loss = Double.parseDouble(tmp[0]);
// check whether the current loss in our annotation is also present in the peak list
Double matchedMass = this.containsMass(loss, uniqueMassDifferences, mzabs, mzppm);
if (matchedMass != null) {
// if yes we need to consider it
// create fingerprint groups from annotation entry
FingerprintGroup[] groups = this.getFingerprintGroup(tmp);
if (mergedFingerprintGroupLists.containsKey(matchedMass)) {
// check whether mass is already present
MassToFingerprintGroupList currentGroupList = mergedFingerprintGroupLists.get(matchedMass);
for (int i = 0; i < groups.length; i++) {
// check if fingerprint is already inserted
FingerprintGroup curGroup = currentGroupList.getElementByFingerprint(groups[i].getFingerprint());
if (// if not simply add it
curGroup == null)
// if not simply add it
currentGroupList.addElement(groups[i]);
else {
// otherwise increase the number of matched observations to adapt number of unique tupels
if (curGroup.getFingerprint().getSize() != 1)
numMatchedObservationsMerged++;
// adapt loss-fingerprint assignment values
curGroup.setNumberObserved(curGroup.getNumberObserved() + groups[i].getNumberObserved());
curGroup.setProbability(curGroup.getProbability() + groups[i].getProbability());
}
}
} else {
// if mass not yet present simply add it
MassToFingerprintGroupList currentGroupList = new MassToFingerprintGroupList(matchedMass);
for (int i = 0; i < groups.length; i++) currentGroupList.addElement(groups[i]);
mergedFingerprintGroupLists.put(matchedMass, currentGroupList);
}
}
}
java.util.Iterator<Double> it = mergedFingerprintGroupLists.keySet().iterator();
while (it.hasNext()) {
lossToFingerprintGroupListCollection.addElementSorted(mergedFingerprintGroupLists.get(it.next()));
}
// store all mass differences (losses) found in the peak list
for (int i = 0; i < massDifferences.size(); i++) {
if (lossToFingerprintGroupListCollection.getElementByPeak(massDifferences.get(i), mzppm, mzabs) != null)
lossMassesFound.add(massDifferences.get(i));
}
breader.close();
settings.set(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME, lossMassesFound);
settings.set(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME, lossToFingerprintGroupListCollection);
}
}
use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.
the class AutomatedLossFingerprintAnnotationScoreInitialiser method postProcessScoreParameters.
public void postProcessScoreParameters(Settings settings) throws AtomTypeNotKnownFromInputListException, Exception {
CombinedSingleCandidateMetFragProcess[] processes = (CombinedSingleCandidateMetFragProcess[]) settings.get(VariableNames.METFRAG_PROCESSES_NAME);
// fingerprints not seen in training
MassToFingerprintsHashMap lossMassToFingerprints = new MassToFingerprintsHashMap();
MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
int ionmode = (Integer) settings.get(VariableNames.PRECURSOR_ION_MODE_NAME);
boolean ispositive = (Boolean) settings.get(VariableNames.IS_POSITIVE_ION_MODE_NAME);
double adductMass = Constants.getIonisationTypeMassCorrection(Constants.ADDUCT_NOMINAL_MASSES.indexOf(ionmode), ispositive);
double precursorMass = (Double) settings.get(VariableNames.PRECURSOR_NEUTRAL_MASS_NAME);
double ionmass = MathTools.round(precursorMass + adductMass);
Fingerprint fingerprint = new Fingerprint((String) settings.get(VariableNames.FINGERPRINT_TYPE_NAME));
for (CombinedSingleCandidateMetFragProcess scmfp : processes) {
/*
* check whether the single run was successful
*/
if (scmfp.wasSuccessful()) {
ICandidate candidate = scmfp.getScoredPrecursorCandidates()[0];
java.util.ArrayList<MassFingerprintMatch> lossMatchlist = new java.util.ArrayList<MassFingerprintMatch>();
MatchList matchlist = candidate.getMatchList();
if (matchlist != null) {
candidate.initialisePrecursorCandidate();
for (int i = 0; i < matchlist.getNumberElements(); i++) {
IMatch matchI = matchlist.getElement(i);
IFragment fragmentI = matchI.getBestMatchedFragment();
double peakMassI = matchI.getMatchedPeak().getMass();
for (int j = i + 1; j < matchlist.getNumberElements(); j++) {
IMatch matchJ = matchlist.getElement(j);
double peakMassJ = matchJ.getMatchedPeak().getMass();
IFragment fragmentJ = matchJ.getBestMatchedFragment();
if (fragmentJ.isRealSubStructure(fragmentI)) {
double diff = MathTools.round(peakMassJ - peakMassI);
MassToFingerprintGroupList matchingLossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(diff, mzppm, mzabs);
if (matchingLossToFingerprintGroupList == null)
continue;
IFragment diffFragment = fragmentJ.getDifferenceFragment(candidate.getPrecursorMolecule(), fragmentI);
if (diffFragment == null)
continue;
IAtomContainer con = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), diffFragment);
lossMatchlist.add(new MassFingerprintMatch(diff, fingerprint.getNormalizedFastBitArrayFingerprint(con)));
}
}
// do the same for the precursor ion
double diff = MathTools.round(ionmass - peakMassI);
MassToFingerprintGroupList matchingLossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(diff, mzppm, mzabs);
if (matchingLossToFingerprintGroupList == null)
continue;
IFragment diffFragment = fragmentI.getDifferenceFragment(candidate.getPrecursorMolecule());
if (diffFragment == null)
continue;
IAtomContainer con = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), diffFragment);
lossMatchlist.add(new MassFingerprintMatch(diff, fingerprint.getNormalizedFastBitArrayFingerprint(con)));
}
}
// java.util.LinkedList<Double> nonExplainedLosses = this.getNonExplainedLoss(peakList, matchlist);
for (int j = 0; j < lossMatchlist.size(); j++) {
MassFingerprintMatch lossMatch = lossMatchlist.get(j);
MassToFingerprintGroupList lossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(lossMatch.getMass(), mzppm, mzabs);
// if not loss not in our annotation list, there's no need to consider it
if (lossToFingerprintGroupList == null)
continue;
// lossMatch.setMass(lossToFingerprintGroupList.getPeakmz());
FastBitArray currentFingerprint = lossMatch.getFingerprint();
// check whether fingerprint was observed for current peak mass in the training data
if (!lossToFingerprintGroupList.containsFingerprint(currentFingerprint)) {
// if not add the fingerprint to background by addFingerprint function
// addFingerprint checks also whether fingerprint was already added
lossMassToFingerprints.addFingerprint(lossMatch.getMass(), currentFingerprint);
}
}
java.util.LinkedList<?> lossMassesFoundInPeakList = (java.util.LinkedList<?>) settings.get(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME);
// important! now add all losses not assigned by that candidates
// this is to equalize all loss match lists in length over all candidates
this.addNonExplainedLosses(lossMassesFoundInPeakList, lossMatchlist);
candidate.setProperty("LossMatchList", lossMatchlist);
}
}
// alpha
double alpha = (double) settings.get(VariableNames.LOSS_FINGERPRINT_ANNOTATION_ALPHA_VALUE_NAME);
// beta
double beta = (double) settings.get(VariableNames.LOSS_FINGERPRINT_ANNOTATION_BETA_VALUE_NAME);
// f_s
double f_seen_matched = (double) settings.get(VariableNames.LOSS_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME);
// f_s
double f_seen_non_matched = (double) settings.get(VariableNames.LOSS_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME);
// f_u
double f_unseen_matched = lossMassToFingerprints.getOverallMatchedSize();
// f_u
double f_unseen_non_matched = lossMassToFingerprints.getOverallNonMatchedSize();
// \sum_N
double sumFingerprintFrequencies = (double) settings.get(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_COUNT_NAME);
// set value for denominator of P(f,m)
double denominatorValue = sumFingerprintFrequencies + alpha * (f_seen_matched + f_unseen_matched) + beta * (f_seen_non_matched + f_unseen_non_matched);
settings.set(VariableNames.LOSS_FINGERPRINT_DENOMINATOR_VALUE_NAME, denominatorValue);
// P(f,m) F_u
double alphaProbability = alpha / denominatorValue;
// p(f,m) not annotated
double betaProbability = beta / denominatorValue;
for (int i = 0; i < lossToFingerprintGroupListCollection.getNumberElements(); i++) {
MassToFingerprintGroupList groupList = lossToFingerprintGroupListCollection.getElement(i);
// sum_f P(f,m)
// calculate sum of MF_s (including the alpha count) and the joint probabilities
// at this stage getProbability() returns the absolute counts from the annotation files
double sum_f = 0.0;
double sumFsProbabilities = 0.0;
for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
// first calculate P(f,m)
if (groupList.getElement(ii).getFingerprint().getSize() != 1)
groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + alpha) / denominatorValue);
else
groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + beta) / denominatorValue);
// sum_f P(f,m) -> for F_s
sumFsProbabilities += groupList.getElement(ii).getJointProbability();
}
// calculate the sum of probabilities for un-observed fingerprints for the current mass
double sumFuProbabilities = alphaProbability * lossMassToFingerprints.getSizeMatched(groupList.getPeakmz());
// not needed as it's defined by fingerprint = "0"
// sumFuProbabilities += betaProbability * lossMassToFingerprints.getSizeNonMatched(groupList.getPeakmz());
sumFuProbabilities += betaProbability;
sum_f += sumFsProbabilities;
sum_f += sumFuProbabilities;
for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
// second calculate P(f|m)
groupList.getElement(ii).setConditionalProbability_sp(groupList.getElement(ii).getJointProbability() / sum_f);
}
groupList.setAlphaProb(alphaProbability / sum_f);
groupList.setBetaProb(betaProbability / sum_f);
groupList.setProbabilityToConditionalProbability_sp();
groupList.calculateSumProbabilites();
}
return;
}
use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.
the class AutomatedPeakFingerprintAnnotationScore method singlePostCalculate.
@Override
public void singlePostCalculate() {
this.value = 0.0;
MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) this.settings.get(VariableNames.PEAK_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
java.util.ArrayList<?> peakMatchList = (java.util.ArrayList<?>) this.candidate.getProperty("PeakMatchList");
int matches = 0;
// get foreground fingerprint observations (m_f_observed)
java.util.ArrayList<Double> matchMasses = new java.util.ArrayList<Double>();
java.util.ArrayList<Double> matchProb = new java.util.ArrayList<Double>();
// found - 1; non-found - 2 (fp="0"); alpha - 3; beta - 4
java.util.ArrayList<Integer> matchType = new java.util.ArrayList<Integer>();
// get foreground fingerprint observations (m_f_observed)
for (int i = 0; i < peakToFingerprintGroupListCollection.getNumberElements(); i++) {
// get f_m_observed
MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElement(i);
Double currentMass = peakToFingerprintGroupList.getPeakmz();
MassFingerprintMatch currentMatch = getMatchByMass(peakMatchList, currentMass);
if (currentMatch == null) {
FingerprintGroup fg = peakToFingerprintGroupList.getElementByFingerprint(new FastBitArray("0"));
if (fg == null) {
matchProb.add(peakToFingerprintGroupList.getBetaProb());
matchType.add(4);
this.value += Math.log(peakToFingerprintGroupList.getBetaProb());
} else {
matchProb.add(fg.getProbability());
matchType.add(2);
this.value += Math.log(fg.getProbability());
}
matchMasses.add(currentMass);
} else {
FastBitArray currentFingerprint = new FastBitArray(currentMatch.getFingerprint());
// ToDo: at this stage try to check all fragments not only the best one
// (p(m,f) + alpha) / sum_F(p(m,f)) + |F| * alpha
double matching_prob = peakToFingerprintGroupList.getMatchingProbability(currentFingerprint);
// |F|
if (matching_prob != 0.0) {
this.value += Math.log(matching_prob);
matchProb.add(matching_prob);
matchMasses.add(currentMass);
if (currentFingerprint.getSize() != 1) {
matches++;
matchType.add(1);
} else
matchType.add(2);
} else {
if (currentFingerprint.equals(new FastBitArray("0")) && peakToFingerprintGroupList.getElementByFingerprint(currentFingerprint) == null) {
matchType.add(4);
matchProb.add(peakToFingerprintGroupList.getBetaProb());
this.value += Math.log(peakToFingerprintGroupList.getBetaProb());
} else {
this.value += Math.log(peakToFingerprintGroupList.getAlphaProb());
matchProb.add(peakToFingerprintGroupList.getAlphaProb());
matchType.add(3);
}
matchMasses.add(currentMass);
}
}
}
if (peakToFingerprintGroupListCollection.getNumberElements() == 0)
this.value = 0.0;
this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore_Matches", matches);
this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore", this.value);
this.candidate.setProperty("AutomatedPeakFingerprintAnnotationScore_Probtypes", this.getProbTypeString(matchProb, matchType, matchMasses));
this.candidate.removeProperty("PeakMatchList");
}
use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.
the class AutomatedLossFingerprintAnnotationScore method singlePostCalculate.
@Override
public void singlePostCalculate() {
this.value = 0.0;
MassToFingerprintGroupListCollection lossToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) this.settings.get(VariableNames.LOSS_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
// all losses found in peak list
java.util.LinkedList<?> lossMassesFoundInPeakList = (java.util.LinkedList<?>) ((java.util.LinkedList<?>) this.settings.get(VariableNames.LOSS_MASSES_FOUND_PEAKLIST_NAME)).clone();
int matches = 0;
Double mzppm = (Double) settings.get(VariableNames.RELATIVE_MASS_DEVIATION_NAME);
Double mzabs = (Double) settings.get(VariableNames.ABSOLUTE_MASS_DEVIATION_NAME);
// get match list of the current candidate
java.util.ArrayList<?> lossMatchlist = (java.util.ArrayList<?>) this.candidate.getProperty("LossMatchList");
java.util.ArrayList<Double> matchMasses = new java.util.ArrayList<Double>();
java.util.ArrayList<Double> matchProb = new java.util.ArrayList<Double>();
// found - 1; non-found - 2 (fp="0"); alpha - 3; beta - 4
java.util.ArrayList<Integer> matchType = new java.util.ArrayList<Integer>();
// get foreground fingerprint observations (m_f_observed)
for (int i = 0; i < lossMatchlist.size(); i++) {
// get f_m_observed
MassFingerprintMatch currentMatch = (MassFingerprintMatch) lossMatchlist.get(i);
lossMassesFoundInPeakList.remove(lossMassesFoundInPeakList.indexOf(currentMatch.getMass()));
MassToFingerprintGroupList lossToFingerprintGroupList = lossToFingerprintGroupListCollection.getElementByPeak(currentMatch.getMass(), mzppm, mzabs);
// MassFingerprintMatch currentMatch = this.getMatchByMass(matchlist, currentMass);
FastBitArray currentFingerprint = new FastBitArray(currentMatch.getFingerprint());
// ToDo: at this stage try to check all fragments not only the best one
// (p(m,f) + alpha) / sum_F(p(m,f)) + |F| * alpha
double matching_prob = lossToFingerprintGroupList.getMatchingProbability(currentFingerprint);
if (matching_prob != 0.0) {
// if probability of current fingerprint is non-zero, it was observed in the training
matches++;
this.value += Math.log(matching_prob);
matchProb.add(matching_prob);
if (// if valid fingerprint
currentFingerprint.getSize() != 1)
// if valid fingerprint
matchType.add(1);
else
// if size of fingerprint is 1 then it's the dummy fingerprint
matchType.add(2);
matchMasses.add(currentMatch.getMass());
} else {
// if not type 1 or type 2
matchMasses.add(currentMatch.getMass());
if (currentFingerprint.getSize() != 1) {
this.value += Math.log(lossToFingerprintGroupList.getAlphaProb());
matchProb.add(lossToFingerprintGroupList.getAlphaProb());
matchType.add(3);
} else {
this.value += Math.log(lossToFingerprintGroupList.getBetaProb());
matchProb.add(lossToFingerprintGroupList.getBetaProb());
matchType.add(4);
}
}
}
if (lossToFingerprintGroupListCollection.getNumberElements() == 0)
this.value = 0.0;
this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore_Matches", matches);
this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore", this.value);
this.candidate.setProperty("AutomatedLossFingerprintAnnotationScore_Probtypes", this.getProbTypeString(matchProb, matchType, matchMasses));
this.candidate.removeProperty("LossMatchList");
}
use of de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection in project MetFragRelaunched by ipb-halle.
the class AutomatedPeakFingerprintAnnotationScoreInitialiser method postProcessScoreParameters.
public void postProcessScoreParameters(Settings settings) throws Exception {
CombinedSingleCandidateMetFragProcess[] processes = (CombinedSingleCandidateMetFragProcess[]) settings.get(VariableNames.METFRAG_PROCESSES_NAME);
// to determine F_u
MassToFingerprintsHashMap peakMassToFingerprints = new MassToFingerprintsHashMap();
MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = (MassToFingerprintGroupListCollection) settings.get(VariableNames.PEAK_TO_FINGERPRINT_GROUP_LIST_COLLECTION_NAME);
Fingerprint fingerprint = new Fingerprint((String) settings.get(VariableNames.FINGERPRINT_TYPE_NAME));
DefaultPeakList peakList = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
for (CombinedSingleCandidateMetFragProcess scmfp : processes) {
/*
* check whether the single run was successful
*/
if (scmfp.wasSuccessful()) {
ICandidate candidate = scmfp.getScoredPrecursorCandidates()[0];
MatchList matchlist = candidate.getMatchList();
java.util.LinkedList<Double> nonExplainedPeaks = this.getNonExplainedPeaks(peakList, matchlist);
java.util.ArrayList<MassFingerprintMatch> peakMatchlist = new java.util.ArrayList<MassFingerprintMatch>();
int numberMatchedPeaks = matchlist == null ? 0 : matchlist.getNumberElements();
candidate.initialisePrecursorCandidate();
for (int j = 0; j < numberMatchedPeaks + nonExplainedPeaks.size(); j++) {
FastBitArray currentFingerprint = null;
Double mass = null;
// check if it's a valid match (peak-fragment assignment)
if (j < matchlist.getNumberElements()) {
IMatch match = matchlist.getElement(j);
mass = match.getMatchedPeak().getMass();
IFragment frag = match.getBestMatchedFragment();
try {
IAtomContainer conPre = fingerprint.getNormalizedAtomContainer(candidate.getPrecursorMolecule(), frag);
// IAtomContainer con = MoleculeFunctions.convertImplicitToExplicitHydrogens(conPre);
currentFingerprint = fingerprint.getNormalizedFastBitArrayFingerprint(conPre);
} catch (InvalidSmilesException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (CDKException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// else it must be a non-matched peak
mass = nonExplainedPeaks.get(j - matchlist.getNumberElements());
currentFingerprint = new FastBitArray("0");
}
MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeak(mass);
if (peakToFingerprintGroupList == null)
continue;
peakMatchlist.add(new MassFingerprintMatch(mass, currentFingerprint));
// check whether fingerprint was observed for current peak mass in the training data
if (!peakToFingerprintGroupList.containsFingerprint(currentFingerprint)) {
// if not add the fingerprint to background by addFingerprint function
// addFingerprint checks also whether fingerprint was already added
peakMassToFingerprints.addFingerprint(mass, currentFingerprint);
}
}
candidate.setProperty("PeakMatchList", peakMatchlist);
}
}
// f_s
double f_seen_matched = (double) settings.get(VariableNames.PEAK_FINGERPRINT_MATCHED_TUPLE_COUNT_NAME);
// f_s
double f_seen_non_matched = (double) settings.get(VariableNames.PEAK_FINGERPRINT_NON_MATCHED_TUPLE_COUNT_NAME);
// f_u
double f_unseen_matched = peakMassToFingerprints.getOverallMatchedSize();
// f_u
double f_unseen_non_matched = peakMassToFingerprints.getOverallNonMatchedSize();
// alpha
double alpha = (double) settings.get(VariableNames.PEAK_FINGERPRINT_ANNOTATION_ALPHA_VALUE_NAME);
// beta
double beta = (double) settings.get(VariableNames.PEAK_FINGERPRINT_ANNOTATION_BETA_VALUE_NAME);
// \sum_N \sum_Ln 1
double sumFingerprintFrequencies = (double) settings.get(VariableNames.PEAK_FINGERPRINT_DENOMINATOR_COUNT_NAME);
// set value for denominator of P(f,m)
double denominatorValue = sumFingerprintFrequencies + alpha * (f_seen_matched + f_unseen_matched) + beta * (f_seen_non_matched + f_unseen_non_matched);
settings.set(VariableNames.PEAK_FINGERPRINT_DENOMINATOR_VALUE_NAME, denominatorValue);
// P(f,m) F_u
double alphaProbability = alpha / denominatorValue;
// p(f,m) not annotated
double betaProbability = beta / denominatorValue;
for (int i = 0; i < peakToFingerprintGroupListCollection.getNumberElements(); i++) {
MassToFingerprintGroupList groupList = peakToFingerprintGroupListCollection.getElement(i);
// sum_f P(f,m)
// calculate sum of MF_s (including the alpha count) and the joint probabilities
// at this stage getProbability() returns the absolute counts from the annotation files
double sum_f = 0.0;
double sumFsProbabilities = 0.0;
for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
// first calculate P(f,m)
if (groupList.getElement(ii).getFingerprint().getSize() != 1)
groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + alpha) / denominatorValue);
else
groupList.getElement(ii).setJointProbability((groupList.getElement(ii).getProbability() + beta) / denominatorValue);
// sum_f P(f,m) -> for F_s
sumFsProbabilities += groupList.getElement(ii).getJointProbability();
}
// calculate the sum of probabilities for un-observed fingerprints for the current mass
double sumFuProbabilities = alphaProbability * peakMassToFingerprints.getSizeMatched(groupList.getPeakmz());
sumFuProbabilities += betaProbability;
sum_f += sumFsProbabilities;
sum_f += sumFuProbabilities;
for (int ii = 0; ii < groupList.getNumberElements(); ii++) {
// second calculate P(f|m)
groupList.getElement(ii).setConditionalProbability_sp(groupList.getElement(ii).getJointProbability() / sum_f);
}
groupList.setAlphaProb(alphaProbability / sum_f);
groupList.setBetaProb(betaProbability / sum_f);
groupList.setProbabilityToConditionalProbability_sp();
groupList.calculateSumProbabilites();
}
return;
}
Aggregations