Search in sources :

Example 1 with IDatabase

use of de.ipbhalle.metfraglib.interfaces.IDatabase in project MetFragRelaunched by ipb-halle.

the class AddMissingNonExplainedPeaks method main.

public static void main(String[] args) throws Exception {
    String paramfile = args[0];
    String resultfile = args[1];
    String outputfile = args[2];
    Settings settings = getSettings(paramfile);
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, resultfile);
    IPeakListReader peakListReader = (IPeakListReader) Class.forName((String) settings.get(VariableNames.METFRAG_PEAK_LIST_READER_NAME)).getConstructor(Settings.class).newInstance(settings);
    SettingsChecker settingsChecker = new SettingsChecker();
    if (!settingsChecker.check(settings)) {
        System.err.println("Problems reading settings");
        return;
    }
    settings.set(VariableNames.PEAK_LIST_NAME, peakListReader.read());
    IDatabase db = null;
    String dbFilename = (String) settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME);
    if (dbFilename.endsWith("psv"))
        db = new LocalPSVDatabase(settings);
    else
        db = new LocalCSVDatabase(settings);
    ArrayList<String> ids = null;
    try {
        ids = db.getCandidateIdentifiers();
    } catch (MultipleHeadersFoundInInputDatabaseException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    CandidateList candidates = null;
    try {
        candidates = db.getCandidateByIdentifier(ids);
    } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    if (candidates.getNumberElements() == 0) {
        System.out.println("No candidates found in " + (String) settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME));
        return;
    }
    DefaultPeakList peaklist = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
    for (int i = 0; i < candidates.getNumberElements(); i++) {
        String explPeaks = (String) candidates.getElement(i).getProperty("ExplPeaks");
        String[] explPeaksArray = explPeaks.split(";");
        Double[] explPeaksMasses = null;
        if (!explPeaks.equals("NA"))
            explPeaksMasses = getDoubleArrayFromPeakList(explPeaksArray);
        String nonExplPeaksString = "";
        for (int k = 0; k < peaklist.getNumberElements(); k++) {
            if (explPeaks.equals("NA")) {
                nonExplPeaksString += ((IPeak) peaklist.getElement(k)).getMass() + ";";
            } else if (!isContained(((IPeak) peaklist.getElement(k)).getMass(), explPeaksMasses)) {
                nonExplPeaksString += ((IPeak) peaklist.getElement(k)).getMass() + ";";
            }
        }
        if (nonExplPeaksString.length() == 0)
            nonExplPeaksString = "NA";
        if (nonExplPeaksString.endsWith(";"))
            nonExplPeaksString = nonExplPeaksString.substring(0, nonExplPeaksString.length() - 1);
        candidates.getElement(i).setProperty("NonExplainedMasses", nonExplPeaksString);
    }
    IWriter writer = null;
    if (outputfile.endsWith("psv"))
        writer = new CandidateListWriterPSV();
    else
        writer = new CandidateListWriterCSV();
    writer.write(candidates, outputfile);
}
Also used : IPeak(de.ipbhalle.metfraglib.interfaces.IPeak) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) IWriter(de.ipbhalle.metfraglib.interfaces.IWriter) DefaultPeakList(de.ipbhalle.metfraglib.list.DefaultPeakList) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) IPeakListReader(de.ipbhalle.metfraglib.interfaces.IPeakListReader) CandidateListWriterCSV(de.ipbhalle.metfraglib.writer.CandidateListWriterCSV) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) SettingsChecker(de.ipbhalle.metfraglib.parameter.SettingsChecker) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) CandidateListWriterPSV(de.ipbhalle.metfraglib.writer.CandidateListWriterPSV) MetFragGlobalSettings(de.ipbhalle.metfraglib.settings.MetFragGlobalSettings) Settings(de.ipbhalle.metfraglib.settings.Settings)

Example 2 with IDatabase

use of de.ipbhalle.metfraglib.interfaces.IDatabase in project MetFragRelaunched by ipb-halle.

the class CombineResultsForAnnotation method getMatchingCandidate.

/**
 * @param metfragFiles
 * @param id
 * @param inchikey1
 * @return
 */
public static ICandidate getMatchingCandidate(File[] metfragFiles, String id, String inchikey1) {
    for (int i = 0; i < metfragFiles.length; i++) {
        if (metfragFiles[i].getName().startsWith(id)) {
            MetFragGlobalSettings settings = new MetFragGlobalSettings();
            settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, metfragFiles[i].getAbsolutePath());
            IDatabase db = null;
            if (metfragFiles[i].getName().endsWith("csv"))
                db = new LocalCSVDatabase(settings);
            else
                db = new LocalPSVDatabase(settings);
            ArrayList<String> identifiers = null;
            try {
                identifiers = db.getCandidateIdentifiers();
            } catch (MultipleHeadersFoundInInputDatabaseException e1) {
                e1.printStackTrace();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
            CandidateList candidates = null;
            try {
                candidates = db.getCandidateByIdentifier(identifiers);
            } catch (Exception e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            for (int ii = 0; ii < candidates.getNumberElements(); ii++) {
                if (((String) candidates.getElement(ii).getProperty(VariableNames.INCHI_KEY_1_NAME)).equals(inchikey1)) {
                    return candidates.getElement(ii);
                }
            }
        }
    }
    return null;
}
Also used : IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) MetFragGlobalSettings(de.ipbhalle.metfraglib.settings.MetFragGlobalSettings) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) IOException(java.io.IOException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)

Example 3 with IDatabase

use of de.ipbhalle.metfraglib.interfaces.IDatabase in project MetFragRelaunched by ipb-halle.

the class CalculateHydrogenDeuteriumScore method main.

public static void main(String[] args) {
    if (args == null || args.length != 7) {
        System.err.println("hydrogenResultFileName deuteriumResultFileName maxNumDeuteriums outputFileName mzppm mzabs isPositive");
        System.exit(1);
    }
    String hydrogenResultFileName = args[0];
    String deuteriumResultFileName = args[1];
    double maximumNumberDeuteriums = Double.parseDouble(args[2]);
    String outputFileName = args[3];
    ppm = Double.parseDouble(args[4]);
    abs = Double.parseDouble(args[5]);
    boolean isPositive = args[6].trim().toLowerCase().equals("true") ? true : false;
    MetFragGlobalSettings hydrogenSettings = new MetFragGlobalSettings();
    MetFragGlobalSettings deuteriumSettings = new MetFragGlobalSettings();
    hydrogenSettings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, hydrogenResultFileName);
    deuteriumSettings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, deuteriumResultFileName);
    IDatabase hydrogenDatabase = new LocalPSVDatabase(hydrogenSettings);
    IDatabase deuteriumDatabase = new LocalPSVDatabase(deuteriumSettings);
    ArrayList<String> hydrogenIdentifiers = null;
    ArrayList<String> deuteriumIdentifiers = null;
    try {
        hydrogenIdentifiers = hydrogenDatabase.getCandidateIdentifiers();
        deuteriumIdentifiers = deuteriumDatabase.getCandidateIdentifiers();
    } catch (MultipleHeadersFoundInInputDatabaseException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    CandidateList hydrogenCandidateList = null;
    CandidateList deuteriumCandidateList = null;
    try {
        hydrogenCandidateList = hydrogenDatabase.getCandidateByIdentifier(hydrogenIdentifiers);
        deuteriumCandidateList = deuteriumDatabase.getCandidateByIdentifier(deuteriumIdentifiers);
    } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    Hashtable<String, ICandidate> hashTableHydrogenResults = new Hashtable<String, ICandidate>();
    Hashtable<String, ICandidate> hashTableDeuteriumResults = new Hashtable<String, ICandidate>();
    for (int i = 0; i < hydrogenCandidateList.getNumberElements(); i++) {
        if (!hashTableHydrogenResults.containsKey(hydrogenCandidateList.getElement(i).getIdentifier()))
            hashTableHydrogenResults.put(hydrogenCandidateList.getElement(i).getIdentifier(), hydrogenCandidateList.getElement(i));
        else {
            System.err.println("Error: Identifier " + hydrogenCandidateList.getElement(i).getIdentifier() + " already found! Something wrong with the candidate list?");
            System.exit(1);
        }
    }
    for (int i = 0; i < deuteriumCandidateList.getNumberElements(); i++) {
        if (!hashTableDeuteriumResults.containsKey(deuteriumCandidateList.getElement(i).getIdentifier())) {
            hashTableDeuteriumResults.put(deuteriumCandidateList.getElement(i).getIdentifier(), deuteriumCandidateList.getElement(i));
        } else {
            System.err.println("Error: Identifier " + deuteriumCandidateList.getElement(i).getIdentifier() + " already found! Something wrong with the candidate list?");
            System.exit(1);
        }
    }
    System.out.println("#####");
    Iterator<String> deuteriumKeys = hashTableDeuteriumResults.keySet().iterator();
    Hashtable<String, Double> hd_score_1 = new Hashtable<String, Double>();
    Hashtable<String, Double> hd_score_2 = new Hashtable<String, Double>();
    java.util.ArrayList<String> lines = new java.util.ArrayList<String>();
    CandidateList mergedCandidateList = new CandidateList();
    /*
		 * 
		 */
    while (deuteriumKeys.hasNext()) {
        String identifier = deuteriumKeys.next();
        boolean specialCase = false;
        if (identifier.matches(".*-.*")) {
            identifier = identifier.replaceAll("-.*", "");
            specialCase = true;
        }
        if (specialCase) {
            ICandidate hydrogenCandidate = hashTableHydrogenResults.get(identifier);
            int add = 1;
            String nextIdentifier = identifier + "-" + add;
            while (hashTableDeuteriumResults.containsKey(nextIdentifier)) {
                hashTableHydrogenResults.put(nextIdentifier, hydrogenCandidate);
                add++;
                nextIdentifier = identifier + "-" + add;
            }
        }
    }
    deuteriumKeys = hashTableDeuteriumResults.keySet().iterator();
    while (deuteriumKeys.hasNext()) {
        String identifier = deuteriumKeys.next();
        ICandidate hydrogenCandidate = hashTableHydrogenResults.get(identifier);
        ICandidate deuteriumCandidate = hashTableDeuteriumResults.get(identifier);
        String formulasOfExplPeaksHydrogenString = (String) hydrogenCandidate.getProperty("FormulasOfExplPeaks");
        String formulasOfExplPeaksDeuteriumString = (String) deuteriumCandidate.getProperty("FormulasOfExplPeaks");
        int numberPeaksUsed = Math.min(Integer.parseInt((String) deuteriumCandidate.getProperty("NumberPeaksUsed")), Integer.parseInt((String) hydrogenCandidate.getProperty("NumberPeaksUsed")));
        int numberPeaksExplained = Math.min(Integer.parseInt((String) deuteriumCandidate.getProperty("NoExplPeaks")), Integer.parseInt((String) hydrogenCandidate.getProperty("NoExplPeaks")));
        if (formulasOfExplPeaksHydrogenString.equals("NA") || formulasOfExplPeaksDeuteriumString.equals("NA")) {
            hd_score_1.put(identifier, 0.0);
            hd_score_2.put(identifier, 0.0);
            continue;
        }
        String[] tmpHydrogen = formulasOfExplPeaksHydrogenString.split(";");
        String[] tmpDeuterium = formulasOfExplPeaksDeuteriumString.split(";");
        double[] massesHydrogen = new double[tmpHydrogen.length];
        double[] massesDeuterium = new double[tmpDeuterium.length];
        String[] formulasHydrogen = new String[tmpHydrogen.length];
        String[] formulasDeuterium = new String[tmpDeuterium.length];
        for (int i = 0; i < massesHydrogen.length; i++) {
            String[] tmp = tmpHydrogen[i].split(":");
            massesHydrogen[i] = Double.parseDouble(tmp[0]);
            formulasHydrogen[i] = tmp[1];
        }
        for (int i = 0; i < massesDeuterium.length; i++) {
            String[] tmp = tmpDeuterium[i].split(":");
            massesDeuterium[i] = Double.parseDouble(tmp[0]);
            formulasDeuterium[i] = tmp[1];
        }
        int[][] equalPeakPairs = getEqualPeakPairs2(massesHydrogen, massesDeuterium, ppm, abs);
        int[][] deuteriumEqualPeakPairs = getDeuteriumEqualPeakPairs2(massesHydrogen, massesDeuterium, ppm, abs, maximumNumberDeuteriums);
        String fragmentAtomFastBitArraysHydrogenString = (String) hydrogenCandidate.getProperty("FragmentAtomFastBitArrays");
        String fragmentAtomFastBitArraysDeuteriumString = (String) deuteriumCandidate.getProperty("FragmentAtomFastBitArrays");
        String[] fragmentAtomFastBitArraysHydrogen = fragmentAtomFastBitArraysHydrogenString.split(";");
        String[] fragmentAtomFastBitArraysDeuterium = fragmentAtomFastBitArraysDeuteriumString.split(";");
        int countEqualPeakPairs = 0;
        int countDeuteriumEqualPeakPairs = 0;
        ArrayList<Integer> hydrogenMassesUsedForEqualPairs = new ArrayList<Integer>();
        ArrayList<Integer> deuteriumMassesUsedForEqualPairs = new ArrayList<Integer>();
        for (int i = 0; i < equalPeakPairs.length; i++) {
            String[] singleFragmentAtomFastBitArraysHydrogen = fragmentAtomFastBitArraysHydrogen[equalPeakPairs[i][0]].split("/");
            String[] singleFragmentAtomFastBitArraysDeuterium = fragmentAtomFastBitArraysDeuterium[equalPeakPairs[i][1]].split("/");
            String[] singleFormulasOfExplPeaksDeuteriumString = formulasDeuterium[equalPeakPairs[i][1]].split("/");
            for (int k = 0; k < singleFragmentAtomFastBitArraysDeuterium.length; k++) {
                int numDeuteriums = containsDeuterium(singleFormulasOfExplPeaksDeuteriumString[k]);
                if (!isPositive)
                    numDeuteriums = containsDeuterium2(singleFormulasOfExplPeaksDeuteriumString[k]);
                boolean found = true;
                for (int l = 0; l < singleFragmentAtomFastBitArraysHydrogen.length; l++) {
                    if (singleFragmentAtomFastBitArraysDeuterium[k].equals(singleFragmentAtomFastBitArraysHydrogen[l]) && numDeuteriums == 0 && !hydrogenMassesUsedForEqualPairs.contains(equalPeakPairs[i][0]) && !deuteriumMassesUsedForEqualPairs.contains(equalPeakPairs[i][1])) {
                        countEqualPeakPairs++;
                        found = true;
                        hydrogenMassesUsedForEqualPairs.add(equalPeakPairs[i][0]);
                        deuteriumMassesUsedForEqualPairs.add(equalPeakPairs[i][1]);
                        System.out.println("equal " + massesHydrogen[equalPeakPairs[i][0]] + " " + massesDeuterium[equalPeakPairs[i][1]]);
                        break;
                    } else {
                    // System.out.println("no pair " + massesHydrogen[equalPeakPairs[i][0]] + " " + massesDeuterium[equalPeakPairs[i][1]]);
                    }
                }
                if (found)
                    break;
            }
        }
        ArrayList<Integer> hydrogenMassesUsedForDeuteriumEqualPairs = new ArrayList<Integer>();
        ArrayList<Integer> deuteriumMassesUsedForDeuteriumEqualPairs = new ArrayList<Integer>();
        for (int i = 0; i < deuteriumEqualPeakPairs.length; i++) {
            String[] singleFragmentAtomFastBitArraysHydrogen = fragmentAtomFastBitArraysHydrogen[deuteriumEqualPeakPairs[i][0]].split("/");
            String[] singleFragmentAtomFastBitArraysDeuterium = fragmentAtomFastBitArraysDeuterium[deuteriumEqualPeakPairs[i][1]].split("/");
            String[] singleFormulasOfExplPeaksDeuteriumString = formulasDeuterium[deuteriumEqualPeakPairs[i][1]].split("/");
            for (int k = 0; k < singleFragmentAtomFastBitArraysDeuterium.length; k++) {
                int numDeuteriums = containsDeuterium2(singleFormulasOfExplPeaksDeuteriumString[k]);
                boolean found = false;
                for (int l = 0; l < singleFragmentAtomFastBitArraysHydrogen.length; l++) {
                    if (singleFragmentAtomFastBitArraysDeuterium[k].equals(singleFragmentAtomFastBitArraysHydrogen[l]) && numDeuteriums > 0 && numDeuteriums == deuteriumEqualPeakPairs[i][2] && !hydrogenMassesUsedForEqualPairs.contains(deuteriumEqualPeakPairs[i][0]) && !deuteriumMassesUsedForEqualPairs.contains(deuteriumEqualPeakPairs[i][1]) && !hydrogenMassesUsedForDeuteriumEqualPairs.contains(deuteriumEqualPeakPairs[i][0]) && !deuteriumMassesUsedForDeuteriumEqualPairs.contains(deuteriumEqualPeakPairs[i][1])) {
                        countDeuteriumEqualPeakPairs++;
                        hydrogenMassesUsedForDeuteriumEqualPairs.add(deuteriumEqualPeakPairs[i][0]);
                        deuteriumMassesUsedForDeuteriumEqualPairs.add(deuteriumEqualPeakPairs[i][1]);
                        System.out.println("deuterium " + massesHydrogen[deuteriumEqualPeakPairs[i][0]] + " " + massesDeuterium[deuteriumEqualPeakPairs[i][1]]);
                        found = true;
                        break;
                    } else {
                    // System.out.println("no pair " + massesHydrogen[deuteriumEqualPeakPairs[i][0]] + " " + massesDeuterium[deuteriumEqualPeakPairs[i][1]]);
                    }
                }
                if (found)
                    break;
            }
        }
        deuteriumCandidate.setProperty(VariableNames.METFRAG_FRAGMENTER_SCORE_NAME, hydrogenCandidate.getProperty(VariableNames.METFRAG_FRAGMENTER_SCORE_NAME));
        deuteriumCandidate.setProperty("FragmenterDeuteriumScore", deuteriumCandidate.getProperty("Score"));
        // hydrogenCandidate.setProperty("HD-PairScore", (double)(countEqualPeakPairs + countDeuteriumEqualPeakPairs) / (double)numberPeaksUsed);
        deuteriumCandidate.setProperty("HD-PairScore", (double) (countEqualPeakPairs + countDeuteriumEqualPeakPairs) / (double) numberPeaksExplained);
        deuteriumCandidate.setProperty("NumberEqualPairs", countEqualPeakPairs);
        deuteriumCandidate.setProperty("NumberDeuteriumPairs", countDeuteriumEqualPeakPairs);
        double osnDeuteriums = Double.parseDouble((String) deuteriumCandidate.getProperty("OSN-Deuteriums"));
        double missedDeuteriums = Double.parseDouble((String) deuteriumCandidate.getProperty("MissedDeuteriums"));
        if (isPositive) {
            if ((maximumNumberDeuteriums - 1) == 0)
                deuteriumCandidate.setProperty("OSN-DeuteriumsScore", 0.0);
            else
                deuteriumCandidate.setProperty("OSN-DeuteriumsScore", (osnDeuteriums - missedDeuteriums) / (maximumNumberDeuteriums - 1));
        } else {
            if ((maximumNumberDeuteriums + 1) == 0)
                deuteriumCandidate.setProperty("OSN-DeuteriumsScore", 0.0);
            else
                deuteriumCandidate.setProperty("OSN-DeuteriumsScore", (osnDeuteriums - missedDeuteriums) / (maximumNumberDeuteriums + 1));
        }
        deuteriumCandidate.getProperties().remove("FragmentBrokenBondFastBitArrays");
        deuteriumCandidate.getProperties().remove("FragmentBondFastBitArrays");
        deuteriumCandidate.getProperties().remove("FragmentAtomFastBitArrays");
        mergedCandidateList.addElement(deuteriumCandidate);
        // System.out.println(identifier + " " + (double)(countEqualPeakPairs + countDeuteriumEqualPeakPairs) / (double)numberPeaksUsed + " " + countEqualPeakPairs + " " + countDeuteriumEqualPeakPairs + " " + (countEqualPeakPairs + countDeuteriumEqualPeakPairs) + " " + deuteriumCandidate.getProperty("AromaticDeuteriums") + " " + deuteriumCandidate.getProperty("Score") + " " + hydrogenCandidate.getProperty("Score"));
        /*	System.out.println(identifier + " " + (double)(countEqualPeakPairs + countDeuteriumEqualPeakPairs) / (double)numberPeaksUsed + " " + countEqualPeakPairs + " " + countDeuteriumEqualPeakPairs + " " + (countEqualPeakPairs + countDeuteriumEqualPeakPairs) + " " + deuteriumCandidate.getProperty("AromaticDeuteriums") + " " + deuteriumCandidate.getProperty("Score") + " " + hydrogenCandidate.getProperty("Score") 
					+ " " + hydrogenCandidate.getProperty("FragmenterDeuteriumScore") + " " + hydrogenCandidate.getProperty("HD-PairScore")
					+ " " + hydrogenCandidate.getProperty("OSN-DeuteriumsScore"));
			*/
        lines.add(identifier + " " + (double) (countEqualPeakPairs + countDeuteriumEqualPeakPairs) / (double) numberPeaksUsed + " " + countEqualPeakPairs + " " + countDeuteriumEqualPeakPairs + " " + (countEqualPeakPairs + countDeuteriumEqualPeakPairs) + " " + deuteriumCandidate.getProperty("AromaticDeuteriums") + " " + deuteriumCandidate.getProperty("Score") + " " + hydrogenCandidate.getProperty("Score") + " " + deuteriumCandidate.getProperty("FragmenterDeuteriumScore") + " " + deuteriumCandidate.getProperty("HD-PairScore") + " " + deuteriumCandidate.getProperty("OSN-DeuteriumsScore"));
    }
    java.io.File outputFile = new java.io.File(outputFileName);
    CandidateListWriterPSV candidateWriter = new CandidateListWriterPSV();
    try {
        candidateWriter.write(mergedCandidateList, outputFile.getName(), outputFile.getParent());
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : MetFragGlobalSettings(de.ipbhalle.metfraglib.settings.MetFragGlobalSettings) ArrayList(java.util.ArrayList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) CandidateListWriterPSV(de.ipbhalle.metfraglib.writer.CandidateListWriterPSV) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) Hashtable(java.util.Hashtable) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase)

Example 4 with IDatabase

use of de.ipbhalle.metfraglib.interfaces.IDatabase in project MetFragRelaunched by ipb-halle.

the class WriteMetFragToMZtab method main.

public static void main(String[] args) {
    if (!getArgs(args)) {
        System.err.println("Error reading parameters.");
        System.exit(1);
    }
    String metfragFolder = argsHash.get("metfragFolder");
    int numberCandidates = Integer.parseInt(argsHash.get("numberCandidates"));
    String output = argsHash.get("output");
    // check metfrag result folder
    File resultFolder = new File(metfragFolder);
    if (!resultFolder.exists()) {
        System.err.println(resultFolder.getAbsolutePath() + " not found.");
        System.exit(2);
    }
    // get files to convert to mztab
    File[] files = resultFolder.listFiles();
    // mztab feature table
    FeatureTable candidateTable = MSDKObjectBuilder.getFeatureTable("candidateTable", DataPointStoreFactory.getMemoryDataStore());
    // define columns
    FeatureTableColumn<Integer> idColumn = MSDKObjectBuilder.getIdFeatureTableColumn();
    FeatureTableColumn<Double> mzColumn = MSDKObjectBuilder.getMzFeatureTableColumn();
    FeatureTableColumn<ChromatographyInfo> chromatographyInfoColumn = MSDKObjectBuilder.getChromatographyInfoFeatureTableColumn();
    FeatureTableColumn<List<IonAnnotation>> ionAnnotationColumn = MSDKObjectBuilder.getIonAnnotationFeatureTableColumn();
    FeatureTableColumn<Integer> chargeColumn = MSDKObjectBuilder.getChargeFeatureTableColumn();
    // add columns
    candidateTable.addColumn(idColumn);
    candidateTable.addColumn(mzColumn);
    candidateTable.addColumn(chromatographyInfoColumn);
    candidateTable.addColumn(ionAnnotationColumn);
    candidateTable.addColumn(chargeColumn);
    // current row number of feature table
    int rownumber = 1;
    for (int i = 0; i < files.length; i++) {
        MetFragGlobalSettings settings = new MetFragGlobalSettings();
        settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, files[i].getAbsolutePath());
        IDatabase db = null;
        if (files[i].getName().endsWith("csv"))
            db = new LocalCSVDatabase(settings);
        else
            db = new LocalPSVDatabase(settings);
        Float rt = 0.0f;
        Double mz = 0.0;
        try {
            String[] tmp = files[i].getName().split("_");
            if (tmp.length == 1)
                throw new Exception();
            rt = Float.parseFloat(tmp[0]);
            mz = Double.parseDouble(tmp[1]);
        } catch (Exception e) {
            System.out.println(files[i].getName() + " has no rt and mz information. Check file name.");
        }
        ArrayList<String> identifiers = null;
        try {
            identifiers = db.getCandidateIdentifiers();
        } catch (MultipleHeadersFoundInInputDatabaseException e1) {
            e1.printStackTrace();
        } catch (Exception e1) {
            e1.printStackTrace();
        }
        CandidateList candidates = null;
        try {
            candidates = db.getCandidateByIdentifier(identifiers);
        } catch (Exception e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }
        int candidateIndex = 0;
        // get candidates and store them in the FeatureTable
        while (candidateIndex < numberCandidates && candidateIndex < candidates.getNumberElements()) {
            ICandidate candidate = candidates.getElement(candidateIndex);
            FeatureTableRow currentRow = MSDKObjectBuilder.getFeatureTableRow(candidateTable, rownumber);
            FeatureTableColumn<Object> column;
            // Add common data to columns
            // Common column: Id
            column = candidateTable.getColumn(ColumnName.ID, null);
            currentRow.setData(column, Integer.valueOf(rownumber));
            // Common column: m/z
            column = candidateTable.getColumn(ColumnName.MZ, null);
            currentRow.setData(column, mz);
            // Annotation
            column = candidateTable.getColumn(ColumnName.IONANNOTATION, null);
            List<IonAnnotation> ionAnnotations = new ArrayList<IonAnnotation>();
            IonAnnotation ionAnnotation = MSDKObjectBuilder.getIonAnnotation();
            ionAnnotation.setAnnotationId(candidate.getIdentifier());
            try {
                ionAnnotation.setChemicalStructure(candidate.getAtomContainer());
                ionAnnotation.setFormula(MolecularFormulaManipulator.getMolecularFormula(candidate.getAtomContainer()));
                ionAnnotation.setInchiKey(InChIGeneratorFactory.getInstance().getInChIGenerator(candidate.getAtomContainer()).getInchiKey());
            } catch (Exception e) {
                candidateIndex++;
                continue;
            }
            ionAnnotation.setDescription((String) candidate.getProperty(VariableNames.COMPOUND_NAME_NAME));
            ionAnnotations.add(ionAnnotation);
            // ionAnnotation.setExpectedMz(metfrag_settings.get(VariableNames.PRECURSOR_NEUTRAL_MASS_NAME));
            currentRow.setData(column, ionAnnotations);
            // RT
            if (rt != null) {
                ChromatographyInfo cgInfo = MSDKObjectBuilder.getChromatographyInfo1D(SeparationType.LC, rt);
                FeatureTableColumn<ChromatographyInfo> rtcolumn = candidateTable.getColumn("Chromatography Info", null, ChromatographyInfo.class);
                currentRow.setData(rtcolumn, cgInfo);
            }
            // Add row to feature table
            candidateTable.addRow(currentRow);
            rownumber++;
            candidateIndex++;
        }
    }
    // write out mzTab file
    File outputFile = new File(output);
    MzTabFileExportMethod method = new MzTabFileExportMethod(candidateTable, outputFile, true);
    try {
        method.execute();
    } catch (MSDKException e) {
        e.printStackTrace();
        System.err.println("Could not write mzTab file.");
    }
}
Also used : MetFragGlobalSettings(de.ipbhalle.metfraglib.settings.MetFragGlobalSettings) ArrayList(java.util.ArrayList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) ChromatographyInfo(io.github.msdk.datamodel.rawdata.ChromatographyInfo) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) ArrayList(java.util.ArrayList) List(java.util.List) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) MzTabFileExportMethod(io.github.msdk.io.mztab.MzTabFileExportMethod) FeatureTable(io.github.msdk.datamodel.featuretables.FeatureTable) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) MSDKException(io.github.msdk.MSDKException) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) MSDKException(io.github.msdk.MSDKException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) FeatureTableRow(io.github.msdk.datamodel.featuretables.FeatureTableRow) File(java.io.File) IonAnnotation(io.github.msdk.datamodel.ionannotations.IonAnnotation)

Example 5 with IDatabase

use of de.ipbhalle.metfraglib.interfaces.IDatabase in project MetFragRelaunched by ipb-halle.

the class WriteFingerprintLossAnnotationFile method main.

/*
	 * write annotation file
	 * 
	 * filename - input file name
	 * probtype - probability type: 0 - counts; 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
	 * occurThresh
	 * output
	 * csv
	 * fingerprinttype
	 * 
	 */
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
    java.util.Hashtable<String, String> readParameters = readParameters(args);
    if (!readParameters.containsKey("filename")) {
        System.err.println("filename missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("probtype")) {
        System.err.println("probtype missing");
        System.exit(1);
    }
    String filename = readParameters.get("filename");
    Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
    String output = null;
    Integer occurThresh = null;
    String csv = "";
    String fingerprinttype = "";
    String includeNonExplainedString = "";
    if (readParameters.containsKey("output"))
        output = readParameters.get("output");
    if (readParameters.containsKey("occurThresh"))
        occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
    if (readParameters.containsKey("csv"))
        csv = (String) readParameters.get("csv");
    if (readParameters.containsKey("fingerprinttype"))
        fingerprinttype = (String) readParameters.get("fingerprinttype");
    if (readParameters.containsKey("includeNonExplained"))
        includeNonExplainedString = (String) readParameters.get("includeNonExplained");
    ArrayList<Double> peakMassesSorted = new ArrayList<Double>();
    ArrayList<String> fingerprintsSorted = new ArrayList<String>();
    StringBuilder nonExplainedPeaksString = new StringBuilder();
    ArrayList<Double> nonExplainedPeaks = new ArrayList<Double>();
    ArrayList<Integer> peakMassCounts = new ArrayList<Integer>();
    boolean includeNonExplained = true;
    if (includeNonExplainedString.equals("F") || includeNonExplainedString.equals("f") || includeNonExplainedString.equals("False") || includeNonExplainedString.equals("false") || includeNonExplainedString.equals("FALSE"))
        includeNonExplained = false;
    Settings settings = new Settings();
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
    IDatabase db = null;
    if (csv.equals("1")) {
        db = new LocalCSVDatabase(settings);
    } else if (csv.equals("auto")) {
        if (filename.endsWith("psv")) {
            db = new LocalPSVDatabase(settings);
        } else
            db = new LocalCSVDatabase(settings);
    } else
        db = new LocalPSVDatabase(settings);
    java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
    CandidateList candidateList = db.getCandidateByIdentifier(ids);
    // SmilesOfExplPeaks
    for (int i = 0; i < candidateList.getNumberElements(); i++) {
        ICandidate candidate = candidateList.getElement(i);
        String fingerprintsOfExplPeaks = (String) candidate.getProperty("LossFingerprintOfExplPeaks" + fingerprinttype);
        if (fingerprintsOfExplPeaks.equals("NA") || fingerprintsOfExplPeaks.length() == 0)
            continue;
        fingerprintsOfExplPeaks = fingerprintsOfExplPeaks.trim();
        String[] fingerprintPairs = fingerprintsOfExplPeaks.split(";");
        for (int k = 0; k < fingerprintPairs.length; k++) {
            String[] tmp1 = fingerprintPairs[k].split(":");
            Double peak1 = Double.parseDouble(tmp1[0]);
            String fingerprint = null;
            try {
                fingerprint = tmp1[1];
                addSortedFeature(peak1, fingerprint, peakMassesSorted, fingerprintsSorted);
            } catch (Exception e) {
                continue;
            }
        }
        String nonExplMasses = (String) candidate.getProperty("NonExplainedLosses");
        if (!nonExplMasses.equals("NA")) {
            String[] tmp = nonExplMasses.split(";");
            for (int k = 0; k < tmp.length; k++) {
                double mass = Double.parseDouble(tmp[k]);
                if (mass > 2) {
                    addMassSorted(mass, peakMassCounts, nonExplainedPeaks);
                }
            }
        }
    }
    if (nonExplainedPeaks.size() == 0 || !includeNonExplained)
        nonExplainedPeaksString.append("NA");
    else {
        nonExplainedPeaksString.append(nonExplainedPeaks.get(0));
        if (peakMassCounts.get(0) > 1) {
            nonExplainedPeaksString.append(":");
            nonExplainedPeaksString.append(peakMassCounts.get(0));
        }
        for (int i = 1; i < nonExplainedPeaks.size(); i++) {
            nonExplainedPeaksString.append(";");
            nonExplainedPeaksString.append(nonExplainedPeaks.get(i));
            if (peakMassCounts.get(i) > 1) {
                nonExplainedPeaksString.append(":");
                nonExplainedPeaksString.append(peakMassCounts.get(i));
            }
        }
    }
    MassToFingerprintGroupListCollection peakToFingerprintGroupListCollection = new MassToFingerprintGroupListCollection();
    // print(peakMassesSorted, fingerprintsSorted);
    System.out.println(peakMassesSorted.size() + " loss fingerprint pairs");
    Integer id = 0;
    Hashtable<Integer, ArrayList<Double>> grouplistid_to_masses = new Hashtable<Integer, ArrayList<Double>>();
    for (int i = 0; i < peakMassesSorted.size(); i++) {
        Double currentPeak = peakMassesSorted.get(i);
        // MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeakInterval(currentPeak, mzppm, mzabs);
        MassToFingerprintGroupList peakToFingerprintGroupList = peakToFingerprintGroupListCollection.getElementByPeak(currentPeak);
        if (peakToFingerprintGroupList == null) {
            peakToFingerprintGroupList = new MassToFingerprintGroupList(currentPeak);
            peakToFingerprintGroupList.setId(id);
            FingerprintGroup obj = new FingerprintGroup(0.0, null, null, null);
            obj.setFingerprint(fingerprintsSorted.get(i));
            obj.incrementNumberObserved();
            peakToFingerprintGroupList.addElement(obj);
            peakToFingerprintGroupListCollection.addElementSorted(peakToFingerprintGroupList);
            addMass(grouplistid_to_masses, id, currentPeak);
            id++;
        } else {
            Integer current_id = peakToFingerprintGroupList.getId();
            addMass(grouplistid_to_masses, current_id, currentPeak);
            FingerprintGroup fingerprintGroup = peakToFingerprintGroupList.getElementByFingerprint(new FastBitArray(fingerprintsSorted.get(i)));
            if (fingerprintGroup != null) {
                fingerprintGroup.incrementNumberObserved();
            } else {
                fingerprintGroup = new FingerprintGroup(0.0, null, null, null);
                fingerprintGroup.setFingerprint(fingerprintsSorted.get(i));
                fingerprintGroup.incrementNumberObserved();
                peakToFingerprintGroupList.addElement(fingerprintGroup);
            }
        }
    }
    System.out.println("before filtering " + peakToFingerprintGroupListCollection.getNumberElements());
    // peakToFingerprintGroupListCollection.updatePeakMass(mzppm, mzabs);
    peakToFingerprintGroupListCollection.updatePeakMass(grouplistid_to_masses);
    // test filtering
    if (occurThresh != null)
        peakToFingerprintGroupListCollection.filterByOccurence(occurThresh);
    peakToFingerprintGroupListCollection.annotateIds();
    // get absolute numbers of single substructure occurences
    // N^(s)
    int[] substrOccurences = peakToFingerprintGroupListCollection.calculateSubstructureAbsoluteProbabilities();
    int[] peakOccurences = peakToFingerprintGroupListCollection.calculatePeakAbsoluteProbabilities();
    // counts
    if (probabilityType == 0) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToNumberObserved();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p )
    if (probabilityType == 1) {
        // calculate P ( s | p )
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p | s )
    if (probabilityType == 2) {
        System.out.println("annotating IDs");
        // calculate P ( p | s )
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_s
    if (probabilityType == 3) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_p
    if (probabilityType == 4) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p ) P ( p | s ) P( s, p )_s
    if (probabilityType == 5) {
        System.out.println("annotating IDs");
        peakToFingerprintGroupListCollection.updateConditionalProbabilities();
        peakToFingerprintGroupListCollection.updateProbabilities(substrOccurences);
        peakToFingerprintGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
        peakToFingerprintGroupListCollection.setProbabilityToJointProbability();
        peakToFingerprintGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
    if (probabilityType != 5) {
        if (output == null)
            peakToFingerprintGroupListCollection.print();
        else {
            System.out.println("writing to output");
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
            bwriter.write(nonExplainedPeaksString.toString());
            bwriter.newLine();
            bwriter.write(peakToFingerprintGroupListCollection.toString());
            bwriter.write("SUMMARY " + getNumberMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberOccurences(peakToFingerprintGroupListCollection) + " " + getNumberNonMatchedElements(peakToFingerprintGroupListCollection) + " " + getNumberElements(peakToFingerprintGroupListCollection));
            bwriter.newLine();
            bwriter.close();
        }
    }
}
Also used : MassToFingerprintGroupListCollection(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) BufferedWriter(java.io.BufferedWriter) MassToFingerprintGroupList(de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) FastBitArray(de.ipbhalle.metfraglib.FastBitArray) FingerprintGroup(de.ipbhalle.metfraglib.substructure.FingerprintGroup) Settings(de.ipbhalle.metfraglib.settings.Settings) IDatabase(de.ipbhalle.metfraglib.interfaces.IDatabase) Hashtable(java.util.Hashtable) LocalCSVDatabase(de.ipbhalle.metfraglib.database.LocalCSVDatabase) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) File(java.io.File)

Aggregations

LocalPSVDatabase (de.ipbhalle.metfraglib.database.LocalPSVDatabase)6 MultipleHeadersFoundInInputDatabaseException (de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)6 IDatabase (de.ipbhalle.metfraglib.interfaces.IDatabase)6 CandidateList (de.ipbhalle.metfraglib.list.CandidateList)6 LocalCSVDatabase (de.ipbhalle.metfraglib.database.LocalCSVDatabase)5 ICandidate (de.ipbhalle.metfraglib.interfaces.ICandidate)4 MetFragGlobalSettings (de.ipbhalle.metfraglib.settings.MetFragGlobalSettings)4 ArrayList (java.util.ArrayList)4 Settings (de.ipbhalle.metfraglib.settings.Settings)3 File (java.io.File)3 Hashtable (java.util.Hashtable)3 FastBitArray (de.ipbhalle.metfraglib.FastBitArray)2 FingerprintGroup (de.ipbhalle.metfraglib.substructure.FingerprintGroup)2 MassToFingerprintGroupList (de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupList)2 MassToFingerprintGroupListCollection (de.ipbhalle.metfraglib.substructure.MassToFingerprintGroupListCollection)2 CandidateListWriterPSV (de.ipbhalle.metfraglib.writer.CandidateListWriterPSV)2 BufferedWriter (java.io.BufferedWriter)2 FileWriter (java.io.FileWriter)2 IPeak (de.ipbhalle.metfraglib.interfaces.IPeak)1 IPeakListReader (de.ipbhalle.metfraglib.interfaces.IPeakListReader)1