Search in sources :

Example 76 with CandidateList

use of de.ipbhalle.metfraglib.list.CandidateList in project MetFragRelaunched by ipb-halle.

the class CombineResultsForAnnotation method main.

public static void main(String[] args) {
    if (!getArgs(args)) {
        return;
    }
    String metfragFolder = argsHash.get("metfragFolder");
    String parametersFolder = argsHash.get("parametersFolder");
    String outputFolder = argsHash.get("outputFolder");
    String outputPrefix = argsHash.get("outputPrefix");
    File[] metfragFiles = new File(metfragFolder).listFiles();
    File[] parameterFiles = new File(parametersFolder).listFiles();
    CandidateList posList = new CandidateList();
    CandidateList negList = new CandidateList();
    for (int i = 0; i < parameterFiles.length; i++) {
        String id = parameterFiles[i].getName().split("\\.")[0];
        try {
            String inchikey1 = getInChIKey1(parameterFiles[i]);
            ICandidate candidate = getMatchingCandidate(metfragFiles, id, inchikey1);
            if (candidate == null) {
                System.out.println(id + " " + getInChIKey1(parameterFiles[i]) + " not found");
                continue;
            }
            ICandidate newCand = new PrecursorCandidate(candidate.getInChI(), candidate.getIdentifier());
            newCand.setProperty("FragmentFingerprintOfExplPeaks", candidate.getProperty("FragmentFingerprintOfExplPeaks"));
            newCand.setProperty("SmilesOfExplPeaks", candidate.getProperty("SmilesOfExplPeaks"));
            newCand.setProperty("LossFingerprintOfExplPeaks", candidate.getProperty("LossFingerprintOfExplPeaks"));
            if (id.endsWith("01"))
                posList.addElement(newCand);
            else
                negList.addElement(newCand);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    System.out.println(posList.getNumberElements() + " pos");
    System.out.println(negList.getNumberElements() + " neg");
    CandidateListWriterPSV writer = new CandidateListWriterPSV();
    try {
        writer.write(posList, outputPrefix + "_pos", outputFolder);
        writer.write(negList, outputPrefix + "_neg", outputFolder);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CandidateList(de.ipbhalle.metfraglib.list.CandidateList) IOException(java.io.IOException) File(java.io.File) PrecursorCandidate(de.ipbhalle.metfraglib.candidate.PrecursorCandidate) CandidateListWriterPSV(de.ipbhalle.metfraglib.writer.CandidateListWriterPSV) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) IOException(java.io.IOException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)

Example 77 with CandidateList

use of de.ipbhalle.metfraglib.list.CandidateList in project MetFragRelaunched by ipb-halle.

the class WriteFingerprintLossAnnotationFile method main.

/*
	 * write annotation file
	 * 
	 * filename - input file name
	 * probtype - probability type: 0 - counts; 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
	 * occurThresh
	 * output
	 * csv
	 * fingerprinttype
	 * 
	 */
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
    java.util.Hashtable<String, String> readParameters = readParameters(args);
    if (!readParameters.containsKey("filename")) {
        System.err.println("filename missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("probtype")) {
        System.err.println("probtype missing");
        System.exit(1);
    }
    String filename = readParameters.get("filename");
    Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
    String output = null;
    Integer occurThresh = null;
    String csv = "";
    String fingerprinttype = "";
    String includeNonExplainedString = "";
    if (readParameters.containsKey("output"))
        output = readParameters.get("output");
    if (readParameters.containsKey("occurThresh"))
        occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
    if (readParameters.containsKey("csv"))
        csv = (String) readParameters.get("csv");
    if (readParameters.containsKey("fingerprinttype"))
        fingerprinttype = (String) readParameters.get("fingerprinttype");
    if (readParameters.containsKey("includeNonExplained"))
        includeNonExplainedString = (String) readParameters.get("includeNonExplained");
    ArrayList<Double> peakMassesSorted = new ArrayList<Double>();
    ArrayList<String> fingerprintsSorted = new ArrayList<String>();
    StringBuilder nonExplainedPeaksString = new StringBuilder();
    ArrayList<Double> nonExplainedPeaks = new ArrayList<Double>();
    ArrayList<Integer> peakMassCounts = new ArrayList<Integer>();
    boolean includeNonExplained = true;
    if (includeNonExplainedString.equals("F") || includeNonExplainedString.equals("f") || includeNonExplainedString.equals("False") || includeNonExplainedString.equals("false") || includeNonExplainedString.equals("FALSE"))
        includeNonExplained = false;
    Settings settings = new Settings();
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
    IDatabase db = null;
    if (csv.equals("1")) {
        db = new LocalCSVDatabase(settings);
    } else if (csv.equals("auto")) {
        if (filename.endsWith("psv")) {
            db = new LocalPSVDatabase(settings);
        } else
            db = new LocalCSVDatabase(settings);
    } else
        db = new LocalPSVDatabase(settings);
    java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
    CandidateList candidateList = db.getCandidateByIdentifier(ids);
    // SmilesOfExplPeaks
    for (int i = 0; i < candidateList.getNumberElements(); i++) {
        ICandidate candidate = candidateList.getElement(i);
        String fingerprintsOfExplPeaks = (String) candidate.getProperty("LossFingerprintOfExplPeaks" + fingerprinttype);
        if (fingerprintsOfExplPeaks.equals("NA") || fingerprintsOfExplPeaks.length() == 0)
            continue;
        fingerprintsOfExplPeaks = fingerprintsOfExplPeaks.trim();
        String[] fingerprintPairs = fingerprintsOfExplPeaks.split(";");
        for (int k = 0; k < fingerprintPairs.length; k++) {
            String[] tmp1 = fingerprintPairs[k].split(":");
            Double peak1 = Double.parseDouble(tmp1[0]);
            String fingerprint = null;
            try {
                fingerprint = tmp1[1];
                addSortedFeature(peak1, fingerprint, peakMassesSorted, fingerprintsSorted);
            } catch (Exception e) {
                continue;
            }
        }
        String nonExplMasses = (String) candidate.getProperty("NonExplainedLosses");
        if (!nonExplMasses.equals("NA")) {
            String[] tmp = nonExplMasses.split(";");
            for (int k = 0; k < tmp.length; k++) {
                double mass = Double.parseDouble(tmp[k]);
                if (mass > 2) {
                    addMassSorted(mass, peakMassCounts, nonExplainedPeaks);
                }
            }
        }
    }
    if (nonExplainedPeaks.size() == 0 || !includeNonExplained)
        nonExplainedPeaksString.append("NA");
    else {
        nonExplainedPeaksString.append(nonExplainedPeaks.get(0));
        if (peakMassCounts.get(0) > 1) {
            nonExplainedPeaksString.append(":");
            nonExplainedPeaksString.append(peakMassCounts.get(0));
        }
        for (int i = 1; i < nonExplainedPeaks.size(); i++) {
            nonExplainedPeaksString.append(";");
            nonExplainedPeaksString.append(nonExplainedPeaks.get(i));
            if (peakMassCounts.get(i) > 1) {
                nonExplainedPeaksString.append(":");
                nonExplainedPeaksString.append(peakMassCounts.get(i));
            }
        }
    }
    MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = new MassToFingerprintGroupListCollection();
    // print(peakMassesSorted, fingerprintsSorted);
    System.out.println(peakMassesSorted.size() + " loss fingerprint pairs");
    Integer id = 0;
    Hashtable<Integer, ArrayList<Double>> grouplistid_to_masses = new Hashtable<Integer, ArrayList<Double>>();
    for (int i = 0; i < peakMassesSorted.size(); i++) {
        Double currentPeak = peakMassesSorted.get(i);
        // MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeakInterval(currentPeak, mzppm, mzabs);
        MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeak(currentPeak);
        if (peakToFingerprintGroupList == null) {
            peakToFingerprintGroupList = new MassToFingerprintGroupList(currentPeak);
            peakToFingerprintGroupList.setId(id);
            FingerprintGroup obj = new FingerprintGroup(0.0, null, null, null);
            obj.setFingerprint(fingerprintsSorted.get(i));
            obj.incrementNumberObserved();
            peakToFingerprintGroupList.addElement(obj);
            peakToFingerprintGroupListCollection.addElementSorted(peakToFingerprintGroupList);
            addMass(grouplistid_to_masses, id, currentPeak);
            id++;
        } else {
            Integer current_id = peakToFingerprintGroupList.getId();
            addMass(grouplistid_to_masses, current_id, currentPeak);
            FingerprintGroup fingerprintGroup = peakToFingerprintGroupList.getElementByFingerprint(new FastBitArray(fingerprintsSorted.get(i)));
            if (fingerprintGroup != null) {
                fingerprintGroup.incrementNumberObserved();
            } else {
                fingerprintGroup = new FingerprintGroup(0.0, null, null, null);
                fingerprintGroup.setFingerprint(fingerprintsSorted.get(i));
                fingerprintGroup.incrementNumberObserved();
                peakToFingerprintGroupList.addElement(fingerprintGroup);
            }
        }
    }
    System.out.println("before filtering " + peakToFingerprintGroupListCollection.getNumberElements());
    // peakToFingerprintGroupListCollection.updatePeakMass(mzppm, mzabs);
    peakToFingerprintGroupListCollection.updatePeakMass(grouplistid_to_masses);
    // test filtering
    if (occurThresh != null)
        peakToFingerprintGroupListCollection.filterByOccurence(occurThresh);
    peakToFingerprintGroupListCollection.annotateIds();
    // get absolute numbers of single substructure occurences
    // N^(s)
    int[] substrOccurences = peakToFingerprintGroupListCollection.calculateSubstructureAbsoluteProbabilities();
    int[] peakOccurences = peakToFingerprintGroupListCollection.calculatePeakAbsoluteProbabilities();
    // counts
    if (probabilityType == 0) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToNumberObserved();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p )
    if (probabilityType == 1) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p | s )
    if (probabilityType == 2) {
        System.out.println("annotating IDs");
        // calculate P ( p | s )
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_s
    if (probabilityType == 3) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_p
    if (probabilityType == 4) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p ) P ( p | s ) P( s, p )_s
    if (probabilityType == 5) {
        System.out.println("annotating IDs");
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
    if (probabilityType != 5) {
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            System.out.println("writing to output");
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
}
Also used : MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) BufferedWriter(java.io.BufferedWriter) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) FingerprintGroup(de.ipbhalle.metfraglib.substructure.FingerprintGroup) Settings(de.ipbhalle.metfraglib.settings.Settings) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) Hashtable(java.util.Hashtable) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) File(java.io.File)

Example 78 with CandidateList

use of de.ipbhalle.metfraglib.list.CandidateList in project MetFragRelaunched by ipb-halle.

the class WriteFingerprintPeakAnnotationFile method main.

/*
	 * write annotation file
	 * 
	 * filename - input file name
	 * probtype - probability type: 0 - counts; 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
	 * occurThresh
	 * output
	 * csv
	 * fingerprinttype
	 * includeNonExplained
	 * 
	 */
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
    java.util.Hashtable<String, String> readParameters = readParameters(args);
    if (!readParameters.containsKey("filename")) {
        System.err.println("filename missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("probtype")) {
        System.err.println("probtype missing");
        System.exit(1);
    }
    String filename = readParameters.get("filename");
    Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
    String output = null;
    Integer occurThresh = null;
    String csv = "";
    String fingerprinttype = "";
    String includeNonExplainedString = "";
    if (readParameters.containsKey("output"))
        output = readParameters.get("output");
    if (readParameters.containsKey("occurThresh"))
        occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
    if (readParameters.containsKey("csv"))
        csv = (String) readParameters.get("csv");
    if (readParameters.containsKey("fingerprinttype"))
        fingerprinttype = (String) readParameters.get("fingerprinttype");
    if (readParameters.containsKey("includeNonExplained"))
        includeNonExplainedString = (String) readParameters.get("includeNonExplained");
    ArrayList<Double> peakMassesSorted = new ArrayList<Double>();
    ArrayList<String> fingerprintsSorted = new ArrayList<String>();
    StringBuilder nonExplainedPeaksString = new StringBuilder();
    ArrayList<Double> nonExplainedPeaks = new ArrayList<Double>();
    ArrayList<Integer> peakMassCounts = new ArrayList<Integer>();
    boolean includeNonExplained = true;
    if (includeNonExplainedString.equals("F") || includeNonExplainedString.equals("f") || includeNonExplainedString.equals("False") || includeNonExplainedString.equals("false") || includeNonExplainedString.equals("FALSE"))
        includeNonExplained = false;
    Settings settings = new Settings();
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
    IDatabase db = null;
    if (csv == "1")
        db = new LocalCSVDatabase(settings);
    else if (csv.equals("auto")) {
        if (filename.endsWith("psv"))
            db = new LocalPSVDatabase(settings);
        else
            db = new LocalCSVDatabase(settings);
    } else
        db = new LocalPSVDatabase(settings);
    java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
    CandidateList candidateList = db.getCandidateByIdentifier(ids);
    // SmilesOfExplPeaks
    for (int i = 0; i < candidateList.getNumberElements(); i++) {
        ICandidate candidate = candidateList.getElement(i);
        String fingerprintsOfExplPeaks = (String) candidate.getProperty("FragmentFingerprintOfExplPeaks" + fingerprinttype);
        if (fingerprintsOfExplPeaks.equals("NA") || fingerprintsOfExplPeaks.length() == 0)
            continue;
        fingerprintsOfExplPeaks = fingerprintsOfExplPeaks.trim();
        String[] fingerprintPairs = fingerprintsOfExplPeaks.split(";");
        for (int k = 0; k < fingerprintPairs.length; k++) {
            String[] tmp1 = fingerprintPairs[k].split(":");
            Double peak1 = Double.parseDouble(tmp1[0]);
            String fingerprint = null;
            try {
                fingerprint = tmp1[1];
                addSortedFeature(peak1, fingerprint, peakMassesSorted, fingerprintsSorted);
            } catch (Exception e) {
                continue;
            }
        }
        String nonExplMasses = (String) candidate.getProperty("NonExplainedPeaks");
        if (!nonExplMasses.equals("NA")) {
            String[] tmp = nonExplMasses.split(";");
            for (int k = 0; k < tmp.length; k++) {
                double mass = Double.parseDouble(tmp[k]);
                if (mass > 2) {
                    addMassSorted(mass, peakMassCounts, nonExplainedPeaks);
                }
            }
        }
    }
    if (nonExplainedPeaks.size() == 0 || !includeNonExplained)
        nonExplainedPeaksString.append("NA");
    else {
        nonExplainedPeaksString.append(nonExplainedPeaks.get(0));
        if (peakMassCounts.get(0) > 1) {
            nonExplainedPeaksString.append(":");
            nonExplainedPeaksString.append(peakMassCounts.get(0));
        }
        for (int i = 1; i < nonExplainedPeaks.size(); i++) {
            nonExplainedPeaksString.append(";");
            nonExplainedPeaksString.append(nonExplainedPeaks.get(i));
            if (peakMassCounts.get(i) > 1) {
                nonExplainedPeaksString.append(":");
                nonExplainedPeaksString.append(peakMassCounts.get(i));
            }
        }
    }
    MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = new MassToFingerprintGroupListCollection();
    // print(peakMassesSorted, fingerprintsSorted);
    System.out.println(peakMassesSorted.size() + " peak fingerprint pairs");
    Integer id = 0;
    Hashtable<Integer, ArrayList<Double>> grouplistid_to_masses = new Hashtable<Integer, ArrayList<Double>>();
    for (int i = 0; i < peakMassesSorted.size(); i++) {
        Double currentPeak = peakMassesSorted.get(i);
        // MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeakInterval(currentPeak, mzppm, mzabs);
        MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeak(currentPeak);
        if (peakToFingerprintGroupList == null) {
            peakToFingerprintGroupList = new MassToFingerprintGroupList(currentPeak);
            peakToFingerprintGroupList.setId(id);
            FingerprintGroup obj = new FingerprintGroup(0.0, null, null, null);
            obj.setFingerprint(fingerprintsSorted.get(i));
            obj.incrementNumberObserved();
            peakToFingerprintGroupList.addElement(obj);
            peakToFingerprintGroupListCollection.addElementSorted(peakToFingerprintGroupList);
            addMass(grouplistid_to_masses, id, currentPeak);
            id++;
        } else {
            Integer current_id = peakToFingerprintGroupList.getId();
            addMass(grouplistid_to_masses, current_id, currentPeak);
            FingerprintGroup fingerprintGroup = peakToFingerprintGroupList.getElementByFingerprint(new FastBitArray(fingerprintsSorted.get(i)));
            if (fingerprintGroup != null) {
                fingerprintGroup.incrementNumberObserved();
            } else {
                fingerprintGroup = new FingerprintGroup(0.0, null, null, null);
                fingerprintGroup.setFingerprint(fingerprintsSorted.get(i));
                fingerprintGroup.incrementNumberObserved();
                peakToFingerprintGroupList.addElement(fingerprintGroup);
            }
        }
    }
    System.out.println("before filtering " + peakToFingerprintGroupListCollection.getNumberElements());
    // peakToFingerprintGroupListCollection.updatePeakMass(mzppm, mzabs);
    peakToFingerprintGroupListCollection.updatePeakMass(grouplistid_to_masses);
    // test filtering
    if (occurThresh != null)
        peakToFingerprintGroupListCollection.filterByOccurence(occurThresh);
    peakToFingerprintGroupListCollection.annotateIds();
    // get absolute numbers of single substructure occurences
    // N^(s)
    int[] substrOccurences = peakToFingerprintGroupListCollection.calculateSubstructureAbsoluteProbabilities();
    int[] peakOccurences = peakToFingerprintGroupListCollection.calculatePeakAbsoluteProbabilities();
    // counts
    if (probabilityType == 0) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToNumberObserved();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p )
    if (probabilityType == 1) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p | s )
    if (probabilityType == 2) {
        System.out.println("annotating IDs");
        // calculate P ( p | s )
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_s
    if (probabilityType == 3) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_p
    if (probabilityType == 4) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // SUMMARY "number of different pairs (f,m) matched" "sum of all occurrences of all (f,m)" "number of different pairs (_,m)" "number of all different pairs (f,m)"
    if (probabilityType == 5) {
        System.out.println("annotating IDs");
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
    if (probabilityType != 5) {
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            System.out.println("writing to output");
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
}
Also used : MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) BufferedWriter(java.io.BufferedWriter) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) FingerprintGroup(de.ipbhalle.metfraglib.substructure.FingerprintGroup) Settings(de.ipbhalle.metfraglib.settings.Settings) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) Hashtable(java.util.Hashtable) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) File(java.io.File)

Example 79 with CandidateList

use of de.ipbhalle.metfraglib.list.CandidateList in project MetFragRelaunched by ipb-halle.

the class WriteSubstructureAnnotationFile method main.

/*
	 * write annotation file
	 * 
	 * filename - input file name
	 * mzppm
	 * mzabs
	 * probtype - probability type: 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
	 * output - output smarts
	 * outputSMILES - output smiles
	 * occurThresh
	 * 
	 */
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
    java.util.Hashtable<String, String> readParameters = readParameters(args);
    if (!readParameters.containsKey("filename")) {
        System.err.println("filename missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("mzppm")) {
        System.err.println("mzppm missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("mzabs")) {
        System.err.println("mzabs missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("probtype")) {
        System.err.println("probtype missing");
        System.exit(1);
    }
    String filename = readParameters.get("filename");
    Double mzppm = Double.parseDouble(readParameters.get("mzppm"));
    Double mzabs = Double.parseDouble(readParameters.get("mzabs"));
    Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
    String output = null;
    String outputSmiles = null;
    Integer occurThresh = null;
    if (readParameters.containsKey("output"))
        output = readParameters.get("output");
    if (readParameters.containsKey("outputSMILES"))
        outputSmiles = readParameters.get("outputSMILES");
    if (readParameters.containsKey("occurThresh"))
        occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
    Settings settings = new Settings();
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
    LocalPSVDatabase db = new LocalPSVDatabase(settings);
    java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
    CandidateList candidateList = db.getCandidateByIdentifier(ids);
    // SmilesOfExplPeaks
    PeakToSmartsGroupListCollection peakToSmartGroupListCollection = new PeakToSmartsGroupListCollection();
    for (int i = 0; i < candidateList.getNumberElements(); i++) {
        System.out.println(i);
        ICandidate candidate = candidateList.getElement(i);
        String smilesOfExplPeaks = (String) candidate.getProperty("SmilesOfExplPeaks");
        String aromaticSmilesOfExplPeaks = (String) candidate.getProperty("AromaticSmilesOfExplPeaks");
        smilesOfExplPeaks = smilesOfExplPeaks.trim();
        aromaticSmilesOfExplPeaks = aromaticSmilesOfExplPeaks.trim();
        if (smilesOfExplPeaks.equals("NA") || aromaticSmilesOfExplPeaks.equals("NA"))
            continue;
        String[] pairs = smilesOfExplPeaks.split(";");
        String[] aromaticPairs = aromaticSmilesOfExplPeaks.split(";");
        if (pairs.length != aromaticPairs.length) {
            System.out.println(candidate.getIdentifier() + " " + candidate.getProperty(VariableNames.INCHI_KEY_1_NAME));
            continue;
        }
        for (int k = 0; k < pairs.length; k++) {
            String[] tmp = pairs[k].split(":");
            String[] aromaticTmp = aromaticPairs[k].split(":");
            Double peak = Double.parseDouble(tmp[0]);
            String smiles = null;
            String smarts = null;
            try {
                smiles = tmp[1];
                smarts = aromaticTmp[1];
            } catch (Exception e) {
                continue;
            }
            PeakToSmartsGroupList peakToSmartGroupList = peakToSmartGroupListCollection.getElementByPeak(peak, mzppm, mzabs);
            if (peakToSmartGroupList == null) {
                peakToSmartGroupList = new PeakToSmartsGroupList(peak);
                SmartsGroup obj = new SmartsGroup(0.0, null, null, null);
                obj.addElement(smarts);
                obj.addSmiles(smiles);
                peakToSmartGroupList.addElement(obj);
                peakToSmartGroupListCollection.addElementSorted(peakToSmartGroupList);
            } else {
                peakToSmartGroupList.setPeakmz((peakToSmartGroupList.getPeakmz() + peak) / 2.0);
                SmartsGroup smartsGroup = peakToSmartGroupList.getElementBySmiles(smiles, 1.0);
                if (smartsGroup != null) {
                    smartsGroup.addElement(smarts);
                    smartsGroup.addSmiles(smiles);
                } else {
                    smartsGroup = new SmartsGroup(0.0, null, null, null);
                    smartsGroup.addElement(smarts);
                    smartsGroup.addSmiles(smiles);
                    peakToSmartGroupList.addElement(smartsGroup);
                }
            }
        }
    }
    // test filtering
    if (occurThresh != null)
        peakToSmartGroupListCollection.filterByOccurence(occurThresh);
    peakToSmartGroupListCollection.annotateIds();
    // get absolute numbers of single substructure occurences
    // N^(s)
    int[] substrOccurences = peakToSmartGroupListCollection.calculateSubstructureAbsoluteProbabilities();
    int[] peakOccurences = peakToSmartGroupListCollection.calculatePeakAbsoluteProbabilities();
    // P ( s | p )
    if (probabilityType == 1) {
        // calculate P ( s | p )
        peakToSmartGroupListCollection.updateConditionalProbabilities();
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p | s )
    if (probabilityType == 2) {
        System.out.println("annotating IDs");
        // calculate P ( p | s )
        peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_s
    if (probabilityType == 3) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_p
    if (probabilityType == 4) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToSmartGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p ) P ( p | s ) P( s, p )_s
    if (probabilityType == 5) {
        System.out.println("annotating IDs");
        peakToSmartGroupListCollection.updateConditionalProbabilities();
        peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
        peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToSmartGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_1")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToSmartGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_2")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        peakToSmartGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_3")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
    }
    if (probabilityType != 5) {
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles)));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
    }
}
Also used : PeakToSmartsGroupListCollection(de.ipbhalle.metfraglib.substructure.PeakToSmartsGroupListCollection) FileWriter(java.io.FileWriter) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) BufferedWriter(java.io.BufferedWriter) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) SmartsGroup(de.ipbhalle.metfraglib.substructure.SmartsGroup) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) File(java.io.File) Settings(de.ipbhalle.metfraglib.settings.Settings) PeakToSmartsGroupList(de.ipbhalle.metfraglib.substructure.PeakToSmartsGroupList)

Example 80 with CandidateList

use of de.ipbhalle.metfraglib.list.CandidateList in project MetFragRelaunched by ipb-halle.

the class ClusterCompoundsThreadRunner method run.

@Override
public void run() {
    System.out.println("clustering compounds");
    CandidateList candidates = new CandidateList();
    java.util.HashMap<String, MetFragResult> resultsMap = new java.util.HashMap<String, MetFragResult>();
    java.util.List<MetFragResult> metfragResult = this.filteredMetFragResultsContainer.getMetFragResults();
    for (int i = 0; i < metfragResult.size(); i++) {
        ICandidate candidate = ((MetFragResult) metfragResult.get(i)).getRoot().getCandidate();
        candidates.addElement(candidate);
        resultsMap.put(candidate.getIdentifier(), (MetFragResult) metfragResult.get(i));
    }
    try {
        ClusterWrapper cwRoot = TanimotoSimilarity.generateCluster(candidates);
        OrganigramNode tnRoot = null;
        java.util.Stack<ClusterWrapper> clusterWrapperStack = new java.util.Stack<ClusterWrapper>();
        java.util.Stack<OrganigramNode> treeNodeStack = new java.util.Stack<OrganigramNode>();
        if (cwRoot.isLeaf())
            tnRoot = new DefaultOrganigramNode("compound", new ClusterLeaf(resultsMap.get(cwRoot.getName()), 0.0), null);
        else
            tnRoot = new DefaultOrganigramNode("compoundGroup", new ClusterNode(0.0), null);
        this.setNodeAttributes(tnRoot, true);
        clusterWrapperStack.push(cwRoot);
        treeNodeStack.push(tnRoot);
        this.leaves = new java.util.Vector<OrganigramNode>();
        int numberExpandedNodes = 0;
        while (!clusterWrapperStack.isEmpty()) {
            ClusterWrapper cwCurrent = clusterWrapperStack.pop();
            OrganigramNode tnCurrent = treeNodeStack.pop();
            ClusterWrapper[] children = cwCurrent.getChildren();
            for (ClusterWrapper child : children) {
                OrganigramNode tnNext = null;
                if (child.isLeaf()) {
                    MetFragResult currentResult = resultsMap.get(child.getName());
                    tnNext = new DefaultOrganigramNode("compound", new ClusterLeaf(currentResult, currentResult.getScore()), tnCurrent);
                    this.leaves.add(tnNext);
                } else
                    tnNext = new DefaultOrganigramNode("compoundGroup", new ClusterNode(0.0), tnCurrent);
                if (numberExpandedNodes < 6) {
                    numberExpandedNodes++;
                    this.setNodeAttributes(tnNext, true);
                } else {
                    this.setNodeAttributes(tnNext, false);
                }
                clusterWrapperStack.push(child);
                treeNodeStack.push(tnNext);
            }
        }
        this.treeRoot = tnRoot;
        this.calculateLeafsUnderneath();
        this.updateScores();
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("finished clustering");
}
Also used : DefaultOrganigramNode(org.primefaces.model.DefaultOrganigramNode) ClusterWrapper(de.ipbhalle.metfraglib.fingerprint.ClusterWrapper) DefaultOrganigramNode(org.primefaces.model.DefaultOrganigramNode) OrganigramNode(org.primefaces.model.OrganigramNode) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) MetFragResult(de.ipbhalle.metfragweb.datatype.MetFragResult)

Aggregations

CandidateList (de.ipbhalle.metfraglib.list.CandidateList)80 ICandidate (de.ipbhalle.metfraglib.interfaces.ICandidate)43 MetFragGlobalSettings (de.ipbhalle.metfraglib.settings.MetFragGlobalSettings)22 SortedScoredCandidateList (de.ipbhalle.metfraglib.list.SortedScoredCandidateList)20 MultipleHeadersFoundInInputDatabaseException (de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)18 File (java.io.File)18 IOException (java.io.IOException)18 ArrayList (java.util.ArrayList)18 MatchList (de.ipbhalle.metfraglib.list.MatchList)17 ScoredCandidateList (de.ipbhalle.metfraglib.list.ScoredCandidateList)17 RelativeIntensityNotDefinedException (de.ipbhalle.metfraglib.exceptions.RelativeIntensityNotDefinedException)15 FileWriter (java.io.FileWriter)14 TopDownPrecursorCandidate (de.ipbhalle.metfraglib.candidate.TopDownPrecursorCandidate)12 LocalPSVDatabase (de.ipbhalle.metfraglib.database.LocalPSVDatabase)12 AtomTypeNotKnownFromInputListException (de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException)9 LocalCSVDatabase (de.ipbhalle.metfraglib.database.LocalCSVDatabase)7 Settings (de.ipbhalle.metfraglib.settings.Settings)7 SQLException (java.sql.SQLException)7 IDatabase (de.ipbhalle.metfraglib.interfaces.IDatabase)6 CombinedMetFragProcess (de.ipbhalle.metfraglib.process.CombinedMetFragProcess)6