Search in sources :

Example 1 with MultipleHeadersFoundInInputDatabaseException

use of de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException in project MetFragRelaunched by ipb-halle.

the class DownloadEntriesFromPubChem method downloadFromCandidateFile.

public static void downloadFromCandidateFile(String filenameIn, String filenameOut) {
    MetFragGlobalSettings settingsIn = new MetFragGlobalSettings();
    settingsIn.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filenameIn);
    LocalPSVDatabase dbIn = new LocalPSVDatabase(settingsIn);
    ArrayList<String> identifiers = null;
    try {
        identifiers = dbIn.getCandidateIdentifiers();
    } catch (MultipleHeadersFoundInInputDatabaseException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    CandidateList candidates = dbIn.getCandidateByIdentifier(identifiers);
    String[] ids = new String[candidates.getNumberElements()];
    for (int i = 0; i < ids.length; i++) ids[i] = candidates.getElement(i).getIdentifier();
    MetFragGlobalSettings settingsPubChem = new MetFragGlobalSettings();
    settingsPubChem.set(VariableNames.PRECURSOR_DATABASE_IDS_NAME, ids);
    OnlineExtendedPubChemDatabase pubchemDB = new OnlineExtendedPubChemDatabase(settingsPubChem);
    try {
        identifiers = pubchemDB.getCandidateIdentifiers();
    } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    CandidateList candidatesPubChem = null;
    try {
        candidatesPubChem = pubchemDB.getCandidateByIdentifier(identifiers);
    } catch (Exception e1) {
        e1.printStackTrace();
    }
    for (int i = 0; i < candidatesPubChem.getNumberElements(); i++) {
        String identifier = candidatesPubChem.getElement(i).getIdentifier();
        try {
            ICandidate currentCandidate = dbIn.getCandidateByIdentifier(identifier);
            currentCandidate.setProperty(VariableNames.PUBCHEM_NUMBER_PUBMED_REFERENCES_NAME, candidatesPubChem.getElement(i).getProperty(VariableNames.PUBCHEM_NUMBER_PUBMED_REFERENCES_NAME));
            currentCandidate.setProperty(VariableNames.PUBCHEM_NUMBER_PATENTS_NAME, candidatesPubChem.getElement(i).getProperty(VariableNames.PUBCHEM_NUMBER_PATENTS_NAME));
        } catch (DatabaseIdentifierNotFoundException e) {
            e.printStackTrace();
        }
    }
    CandidateListWriterPSV writer = new CandidateListWriterPSV();
    String filename = filenameOut.replaceAll(".*\\/", "").replaceAll("\\..*$", "");
    String path = filenameOut.replaceAll(filename + "\\..*$", "");
    try {
        writer.write(candidates, filename, path);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : MetFragGlobalSettings(de.ipbhalle.metfraglib.settings.MetFragGlobalSettings) OnlineExtendedPubChemDatabase(de.ipbhalle.metfraglib.database.OnlineExtendedPubChemDatabase) DatabaseIdentifierNotFoundException(de.ipbhalle.metfraglib.exceptions.DatabaseIdentifierNotFoundException) AtomTypeNotKnownFromInputListException(de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) DatabaseIdentifierNotFoundException(de.ipbhalle.metfraglib.exceptions.DatabaseIdentifierNotFoundException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) CandidateListWriterPSV(de.ipbhalle.metfraglib.writer.CandidateListWriterPSV)

Example 2 with MultipleHeadersFoundInInputDatabaseException

use of de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException in project MetFragRelaunched by ipb-halle.

the class LocalCSVDatabase method readCandidatesFromFile.

/**
 * @throws MultipleHeadersFoundInInputDatabaseException
 * @throws IOException
 */
protected void readCandidatesFromFile() throws Exception {
    this.candidates = new java.util.ArrayList<ICandidate>();
    java.io.File f = new java.io.File((String) this.settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME));
    java.util.List<String> propertyNames = new java.util.ArrayList<String>();
    BufferedReader reader = null;
    if (f.isFile()) {
        reader = new BufferedReader(new FileReader(f));
        CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        java.util.Iterator<?> it = parser.getHeaderMap().keySet().iterator();
        java.util.HashMap<String, String> nameToInputName = new java.util.HashMap<String, String>();
        nameToInputName.put(VariableNames.IDENTIFIER_NAME_3, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.IDENTIFIER_NAME_2, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.IDENTIFIER_NAME, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.MONOISOTOPIC_MASS_NAME_2, VariableNames.MONOISOTOPIC_MASS_NAME);
        nameToInputName.put(VariableNames.MONOISOTOPIC_MASS_NAME, VariableNames.MONOISOTOPIC_MASS_NAME);
        nameToInputName.put(VariableNames.INCHI_NAME_2, VariableNames.INCHI_NAME);
        nameToInputName.put(VariableNames.INCHI_NAME, VariableNames.INCHI_NAME);
        nameToInputName.put(VariableNames.MOLECULAR_FORMULA_NAME_2, VariableNames.MOLECULAR_FORMULA_NAME);
        nameToInputName.put(VariableNames.MOLECULAR_FORMULA_NAME, VariableNames.MOLECULAR_FORMULA_NAME);
        nameToInputName.put(VariableNames.SMILES_NAME_2, VariableNames.SMILES_NAME);
        nameToInputName.put(VariableNames.SMILES_NAME, VariableNames.SMILES_NAME);
        nameToInputName.put(VariableNames.INCHI_KEY_NAME_2, VariableNames.INCHI_KEY_NAME);
        nameToInputName.put(VariableNames.INCHI_KEY_NAME, VariableNames.INCHI_KEY_NAME);
        nameToInputName.put(VariableNames.COMPOUND_NAME_NAME_2, VariableNames.COMPOUND_NAME_NAME);
        nameToInputName.put(VariableNames.COMPOUND_NAME_NAME, VariableNames.COMPOUND_NAME_NAME);
        String[] possibleIdentifierNames = { VariableNames.IDENTIFIER_NAME_3, VariableNames.IDENTIFIER_NAME_2, VariableNames.IDENTIFIER_NAME };
        String[] possibleInChINames = { VariableNames.INCHI_NAME_2, VariableNames.INCHI_NAME };
        java.util.HashMap<String, Boolean> nameToWasFound = new java.util.HashMap<String, Boolean>();
        java.util.Iterator<String> keys = nameToInputName.keySet().iterator();
        while (keys.hasNext()) nameToWasFound.put(keys.next(), Boolean.valueOf(false));
        while (it.hasNext()) {
            String colname = (String) it.next();
            propertyNames.add(colname);
            if (nameToInputName.containsKey(colname))
                nameToWasFound.put(colname, Boolean.valueOf(true));
        }
        String properIdentifierName = "";
        String properInChIName = "";
        for (String name : possibleIdentifierNames) {
            if (nameToWasFound.get(name)) {
                properIdentifierName = name;
                break;
            }
        }
        for (String name : possibleInChINames) {
            if (nameToWasFound.get(name)) {
                properInChIName = name;
                break;
            }
        }
        if (properIdentifierName.equals("")) {
            logger.error("Error: No Identifier column defined.");
            parser.close();
            reader.close();
            throw new Exception();
        }
        if (possibleInChINames.equals("")) {
            logger.error("Error: No InChI column defined.");
            parser.close();
            reader.close();
            throw new Exception();
        }
        int index = 0;
        for (CSVRecord record : parser) {
            index++;
            String identifier = record.get(properIdentifierName);
            if (identifier == null)
                continue;
            identifier = identifier.trim();
            if (identifier.equals("-") || identifier.equals("NO_MATCH"))
                continue;
            ICandidate precursorCandidate = new TopDownPrecursorCandidate(record.get(properInChIName), identifier + "|" + index);
            keys = nameToWasFound.keySet().iterator();
            for (String curKey : this.preparedPropertyNames) {
                if (nameToWasFound.get(curKey)) {
                    String inputName = nameToInputName.get(curKey);
                    if (!precursorCandidate.hasDefinedProperty(inputName))
                        precursorCandidate.setProperty(inputName, record.get(curKey));
                }
            }
            for (int ii = 0; ii < propertyNames.size(); ii++) {
                String colname = propertyNames.get(ii);
                if (!precursorCandidate.hasDefinedProperty(colname)) {
                    precursorCandidate.setProperty(colname, record.get(colname));
                }
            }
            if (!precursorCandidate.hasDefinedProperty(VariableNames.MONOISOTOPIC_MASS_NAME)) {
                try {
                    precursorCandidate.setProperty(VariableNames.MONOISOTOPIC_MASS_NAME, precursorCandidate.getMolecularFormula().getMonoisotopicMass());
                } catch (AtomTypeNotKnownFromInputListException e) {
                    continue;
                }
            } else {
                precursorCandidate.setProperty(VariableNames.MONOISOTOPIC_MASS_NAME, Double.parseDouble((String) precursorCandidate.getProperty(VariableNames.MONOISOTOPIC_MASS_NAME)));
            }
            if (!this.addInChIFromSmiles(precursorCandidate))
                continue;
            if (!this.addSMILESFromInChI(precursorCandidate))
                continue;
            if (!this.addInChIKeyFromSmiles(precursorCandidate))
                continue;
            if (!this.setInChIValues(precursorCandidate))
                continue;
            if (this.checkFilter(precursorCandidate)) {
                this.identifiers.add(precursorCandidate.getIdentifier());
                this.candidates.add(precursorCandidate);
            }
        }
        parser.close();
        reader.close();
        return;
    }
    throw new Exception();
}
Also used : ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) FileReader(java.io.FileReader) TopDownPrecursorCandidate(de.ipbhalle.metfraglib.candidate.TopDownPrecursorCandidate) AtomTypeNotKnownFromInputListException(de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException) AtomTypeNotKnownFromInputListException(de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException) IOException(java.io.IOException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) CSVParser(org.apache.commons.csv.CSVParser) BufferedReader(java.io.BufferedReader) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 3 with MultipleHeadersFoundInInputDatabaseException

use of de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException in project MetFragRelaunched by ipb-halle.

the class LocalPSVDatabase method readCandidatesFromFile.

/**
 * @throws MultipleHeadersFoundInInputDatabaseException
 * @throws IOException
 */
protected void readCandidatesFromFile() throws Exception {
    this.candidates = new java.util.ArrayList<ICandidate>();
    java.io.File f = new java.io.File((String) this.settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME));
    BufferedReader reader = null;
    if (f.isFile()) {
        reader = new BufferedReader(new FileReader(f));
        java.util.HashMap<String, String> nameToInputName = new java.util.HashMap<String, String>();
        nameToInputName.put(VariableNames.IDENTIFIER_NAME_3, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.IDENTIFIER_NAME_2, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.IDENTIFIER_NAME, VariableNames.IDENTIFIER_NAME);
        nameToInputName.put(VariableNames.MONOISOTOPIC_MASS_NAME_2, VariableNames.MONOISOTOPIC_MASS_NAME);
        nameToInputName.put(VariableNames.MONOISOTOPIC_MASS_NAME, VariableNames.MONOISOTOPIC_MASS_NAME);
        nameToInputName.put(VariableNames.INCHI_NAME_2, VariableNames.INCHI_NAME);
        nameToInputName.put(VariableNames.INCHI_NAME, VariableNames.INCHI_NAME);
        nameToInputName.put(VariableNames.MOLECULAR_FORMULA_NAME_2, VariableNames.MOLECULAR_FORMULA_NAME);
        nameToInputName.put(VariableNames.MOLECULAR_FORMULA_NAME, VariableNames.MOLECULAR_FORMULA_NAME);
        nameToInputName.put(VariableNames.SMILES_NAME_2, VariableNames.SMILES_NAME);
        nameToInputName.put(VariableNames.SMILES_NAME, VariableNames.SMILES_NAME);
        nameToInputName.put(VariableNames.INCHI_KEY_NAME_2, VariableNames.INCHI_KEY_NAME);
        nameToInputName.put(VariableNames.INCHI_KEY_NAME, VariableNames.INCHI_KEY_NAME);
        nameToInputName.put(VariableNames.COMPOUND_NAME_NAME_2, VariableNames.COMPOUND_NAME_NAME);
        nameToInputName.put(VariableNames.COMPOUND_NAME_NAME, VariableNames.COMPOUND_NAME_NAME);
        String[] possibleIdentifierNames = { VariableNames.IDENTIFIER_NAME_3, VariableNames.IDENTIFIER_NAME_2, VariableNames.IDENTIFIER_NAME };
        String[] possibleInChINames = { VariableNames.INCHI_NAME_2, VariableNames.INCHI_NAME };
        java.util.HashMap<String, Boolean> nameToWasFound = new java.util.HashMap<String, Boolean>();
        java.util.Iterator<String> keys = nameToInputName.keySet().iterator();
        while (keys.hasNext()) nameToWasFound.put(keys.next(), Boolean.valueOf(false));
        /*
			 * skip first line as header
			 */
        String header = reader.readLine();
        String[] colNames = header.split("\\|");
        java.util.HashMap<String, Integer> propNameToIndex = new java.util.HashMap<String, Integer>();
        for (int i = 0; i < colNames.length; i++) {
            if (propNameToIndex.get(colNames[i]) != null) {
                if (reader != null)
                    reader.close();
                throw new MultipleHeadersFoundInInputDatabaseException("Found " + colNames[i] + " several times in header!");
            }
            propNameToIndex.put(colNames[i], i);
            if (nameToInputName.containsKey(colNames[i]))
                nameToWasFound.put(colNames[i], Boolean.valueOf(true));
        }
        String properIdentifierName = "";
        String properInChIName = "";
        for (String name : possibleIdentifierNames) {
            if (nameToWasFound.get(name)) {
                properIdentifierName = name;
                break;
            }
        }
        for (String name : possibleInChINames) {
            if (nameToWasFound.get(name)) {
                properInChIName = name;
                break;
            }
        }
        String line = "";
        int index = 0;
        while ((line = reader.readLine()) != null) {
            String[] tmp = line.split("\\|");
            String identifier = tmp[propNameToIndex.get(properIdentifierName)].trim();
            ICandidate precursorCandidate = new TopDownPrecursorCandidate(tmp[propNameToIndex.get(properInChIName)].trim(), identifier + "|" + index);
            keys = nameToWasFound.keySet().iterator();
            for (String curKey : this.preparedPropertyNames) {
                curKey = keys.next();
                if (nameToWasFound.get(curKey)) {
                    String inputName = nameToInputName.get(curKey);
                    if (!precursorCandidate.hasDefinedProperty(inputName))
                        precursorCandidate.setProperty(inputName, tmp[propNameToIndex.get(curKey)].trim());
                }
            }
            /*
				 * store all read property fields within the candidate container
				 */
            for (int k = 0; k < colNames.length; k++) {
                String colname = colNames[k];
                if (!precursorCandidate.hasDefinedProperty(colname)) {
                    precursorCandidate.setProperty(colname, tmp[propNameToIndex.get(colname)]);
                }
            }
            if (!precursorCandidate.hasDefinedProperty(VariableNames.MONOISOTOPIC_MASS_NAME)) {
                try {
                    precursorCandidate.setProperty(VariableNames.MONOISOTOPIC_MASS_NAME, precursorCandidate.getMolecularFormula().getMonoisotopicMass());
                } catch (AtomTypeNotKnownFromInputListException e) {
                    continue;
                }
            } else {
                precursorCandidate.setProperty(VariableNames.MONOISOTOPIC_MASS_NAME, Double.parseDouble((String) precursorCandidate.getProperty(VariableNames.MONOISOTOPIC_MASS_NAME)));
            }
            if (!this.addInChIFromSmiles(precursorCandidate))
                continue;
            if (!this.addSMILESFromInChI(precursorCandidate))
                continue;
            if (!this.addInChIKeyFromSmiles(precursorCandidate))
                continue;
            if (!this.setInChIValues(precursorCandidate))
                continue;
            if (this.checkFilter(precursorCandidate)) {
                this.identifiers.add(precursorCandidate.getIdentifier());
                this.candidates.add(precursorCandidate);
            }
        }
    }
    if (reader != null)
        reader.close();
}
Also used : TopDownPrecursorCandidate(de.ipbhalle.metfraglib.candidate.TopDownPrecursorCandidate) AtomTypeNotKnownFromInputListException(de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)

Example 4 with MultipleHeadersFoundInInputDatabaseException

use of de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException in project MetFragRelaunched by ipb-halle.

the class LocalZippedPSVDatabase method readCandidatesFromFile.

/**
 * @throws MultipleHeadersFoundInInputDatabaseException
 * @throws IOException
 */
private void readCandidatesFromFile() throws MultipleHeadersFoundInInputDatabaseException, Exception {
    this.candidates = new java.util.ArrayList<ICandidate>();
    java.io.File f = new java.io.File((String) this.settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME));
    BufferedReader reader = null;
    ZipFile zipFile = null;
    InputStream stream = null;
    if (f.isFile()) {
        zipFile = new ZipFile(f.getAbsolutePath());
        java.util.Enumeration<? extends ZipEntry> entries = zipFile.entries();
        ZipEntry entry = entries.nextElement();
        stream = zipFile.getInputStream(entry);
        reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        /*
			 * skip first line as header
			 */
        String header = reader.readLine();
        String[] colNames = header.split("\\|");
        java.util.HashMap<String, Integer> propNameToIndex = new java.util.HashMap<String, Integer>();
        for (int i = 0; i < colNames.length; i++) {
            if (propNameToIndex.get(colNames[i]) != null) {
                if (reader != null)
                    reader.close();
                if (zipFile != null)
                    zipFile.close();
                throw new MultipleHeadersFoundInInputDatabaseException("Found " + colNames[i] + " several times in header!");
            }
            propNameToIndex.put(colNames[i], i);
        }
        java.util.ArrayList<String> identifiers = new java.util.ArrayList<String>();
        String line = "";
        // int internal_identifier = 0;
        while ((line = reader.readLine()) != null) {
            String[] tmp = line.split("\\|");
            ICandidate precursorCandidate = null;
            String identifier = tmp[propNameToIndex.get(VariableNames.IDENTIFIER_NAME)].trim();
            /*
				if(identifiers.contains(identifier)) {
					reader.close();
					throw new Exception();
				}
				*/
            // identifier = String.valueOf(++internal_identifier);
            identifiers.add(identifier);
            try {
                precursorCandidate = new TopDownPrecursorCandidate(tmp[propNameToIndex.get(VariableNames.INCHI_NAME)].trim(), identifier);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println(line);
                System.exit(1);
            }
            /*
				 * store all read property fields within the candidate container
				 */
            for (int k = 0; k < colNames.length; k++) {
                if (k == propNameToIndex.get(VariableNames.INCHI_NAME) || k == propNameToIndex.get(VariableNames.IDENTIFIER_NAME))
                    continue;
                if (propNameToIndex.get(VariableNames.MONOISOTOPIC_MASS_NAME) != null && k == propNameToIndex.get(VariableNames.MONOISOTOPIC_MASS_NAME))
                    try {
                        precursorCandidate.setProperty(colNames[k], Double.parseDouble(tmp[propNameToIndex.get(colNames[k])]));
                    } catch (Exception e) {
                        System.err.println("error in file " + f.getName() + " for " + identifier);
                        System.err.println("error in column: " + colNames[k] + " " + tmp[propNameToIndex.get(colNames[k])]);
                        e.printStackTrace();
                    }
                else
                    try {
                        precursorCandidate.setProperty(colNames[k], tmp[propNameToIndex.get(colNames[k])]);
                    } catch (Exception e) {
                        System.out.println(line);
                        System.out.println(colNames[k]);
                        System.out.println(propNameToIndex.get(colNames[k]));
                        System.out.println(tmp.length);
                        System.out.println("cols");
                        for (int i = 0; i < tmp.length; i++) System.out.println(tmp[i]);
                        e.printStackTrace();
                    }
            }
            this.candidates.add(precursorCandidate);
        }
        zipFile.close();
    }
    if (reader != null)
        reader.close();
    if (zipFile != null)
        zipFile.close();
    if (stream != null)
        stream.close();
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) TopDownPrecursorCandidate(de.ipbhalle.metfraglib.candidate.TopDownPrecursorCandidate) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) DatabaseIdentifierNotFoundException(de.ipbhalle.metfraglib.exceptions.DatabaseIdentifierNotFoundException) IOException(java.io.IOException) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) ZipFile(java.util.zip.ZipFile) BufferedReader(java.io.BufferedReader) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) ZipFile(java.util.zip.ZipFile)

Example 5 with MultipleHeadersFoundInInputDatabaseException

use of de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException in project MetFragRelaunched by ipb-halle.

the class WriteLossAnnotationFile method main.

/*
	 * write annotation file
	 * 
	 * filename - input file name
	 * mzppm
	 * mzabs
	 * probtype - probability type: 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
	 * output - output smarts
	 * outputSMILES - output smiles
	 * occurThresh
	 * 
	 */
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
    java.util.Hashtable<String, String> readParameters = readParameters(args);
    if (!readParameters.containsKey("filename")) {
        System.err.println("filename missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("mzppm")) {
        System.err.println("mzppm missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("mzabs")) {
        System.err.println("mzabs missing");
        System.exit(1);
    }
    if (!readParameters.containsKey("probtype")) {
        System.err.println("probtype missing");
        System.exit(1);
    }
    String filename = readParameters.get("filename");
    Double mzppm = Double.parseDouble(readParameters.get("mzppm"));
    Double mzabs = Double.parseDouble(readParameters.get("mzabs"));
    Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
    String output = null;
    String outputSmiles = null;
    Integer occurThresh = null;
    if (readParameters.containsKey("output"))
        output = readParameters.get("output");
    if (readParameters.containsKey("outputSMILES"))
        outputSmiles = readParameters.get("outputSMILES");
    if (readParameters.containsKey("occurThresh"))
        occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
    Settings settings = new Settings();
    settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
    LocalPSVDatabase db = new LocalPSVDatabase(settings);
    java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
    CandidateList candidateList = db.getCandidateByIdentifier(ids);
    // SmilesOfExplPeaks
    PeakToSmartsGroupListCollection peakToSmartGroupListCollection = new PeakToSmartsGroupListCollection();
    for (int i = 0; i < candidateList.getNumberElements(); i++) {
        ICandidate candidate = candidateList.getElement(i);
        String smilesOfExplPeaks = (String) candidate.getProperty("LossSmilesOfExplPeaks");
        String aromaticSmilesOfExplPeaks = (String) candidate.getProperty("LossAromaticSmilesOfExplPeaks");
        smilesOfExplPeaks = smilesOfExplPeaks.trim();
        aromaticSmilesOfExplPeaks = aromaticSmilesOfExplPeaks.trim();
        if (smilesOfExplPeaks.equals("NA") || aromaticSmilesOfExplPeaks.equals("NA"))
            continue;
        String[] pairs = smilesOfExplPeaks.split(";");
        String[] aromaticPairs = aromaticSmilesOfExplPeaks.split(";");
        if (pairs.length != aromaticPairs.length) {
            System.out.println(candidate.getIdentifier() + " " + candidate.getProperty(VariableNames.INCHI_KEY_1_NAME));
            continue;
        }
        for (int k = 0; k < pairs.length; k++) {
            String[] tmp = pairs[k].split(":");
            String[] aromaticTmp = aromaticPairs[k].split(":");
            Double peak = Double.parseDouble(tmp[0]);
            String smiles = null;
            String smarts = null;
            try {
                smiles = tmp[1];
                smarts = aromaticTmp[1];
            } catch (Exception e) {
                continue;
            }
            PeakToSmartsGroupList peakToSmartGroupList = peakToSmartGroupListCollection.getElementByPeak(peak, mzppm, mzabs);
            if (peakToSmartGroupList == null) {
                peakToSmartGroupList = new PeakToSmartsGroupList(peak);
                SmartsGroup obj = new SmartsGroup(0.0, null, null, null);
                obj.addElement(smarts);
                obj.addSmiles(smiles);
                peakToSmartGroupList.addElement(obj);
                peakToSmartGroupListCollection.addElementSorted(peakToSmartGroupList);
            } else {
                peakToSmartGroupList.setPeakmz((peakToSmartGroupList.getPeakmz() + peak) / 2.0);
                SmartsGroup smartsGroup = peakToSmartGroupList.getElementBySmiles(smiles, 1.0);
                if (smartsGroup != null) {
                    smartsGroup.addElement(smarts);
                    smartsGroup.addSmiles(smiles);
                } else {
                    smartsGroup = new SmartsGroup(0.0, null, null, null);
                    smartsGroup.addElement(smarts);
                    smartsGroup.addSmiles(smiles);
                    peakToSmartGroupList.addElement(smartsGroup);
                }
            }
        }
    }
    // test filtering
    if (occurThresh != null)
        peakToSmartGroupListCollection.filterByOccurence(occurThresh);
    peakToSmartGroupListCollection.annotateIds();
    // get absolute numbers of single substructure occurences
    // N^(s)
    int[] substrOccurences = peakToSmartGroupListCollection.calculateSubstructureAbsoluteProbabilities();
    int[] peakOccurences = peakToSmartGroupListCollection.calculatePeakAbsoluteProbabilities();
    // P ( s | p )
    if (probabilityType == 1) {
        // calculate P ( s | p )
        peakToSmartGroupListCollection.updateConditionalProbabilities();
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p | s )
    if (probabilityType == 2) {
        System.out.println("annotating IDs");
        // calculate P ( p | s )
        peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_s
    if (probabilityType == 3) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( p , s )_p
    if (probabilityType == 4) {
        System.out.println("annotating IDs");
        // calculate P ( p , s )
        peakToSmartGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        peakToSmartGroupListCollection.sortElementsByProbability();
    }
    // P ( s | p ) P ( p | s ) P( s, p )_s
    if (probabilityType == 5) {
        System.out.println("annotating IDs");
        peakToSmartGroupListCollection.updateConditionalProbabilities();
        peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
        peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
        peakToSmartGroupListCollection.removeDuplicates();
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
        peakToSmartGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_1")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
        peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
        peakToSmartGroupListCollection.sortElementsByProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_2")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
        peakToSmartGroupListCollection.setProbabilityToJointProbability();
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
            peakToSmartGroupListCollection.sortElementsByProbability();
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_3")));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
    }
    if (probabilityType != 5) {
        if (output == null)
            peakToSmartGroupListCollection.print();
        else {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
            bwriter.write(peakToSmartGroupListCollection.toString());
            bwriter.close();
        }
        if (outputSmiles != null) {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles)));
            bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
            bwriter.close();
        }
    }
}
Also used : PeakToSmartsGroupListCollection(de.ipbhalle.metfraglib.substructure.PeakToSmartsGroupListCollection) FileWriter(java.io.FileWriter) ICandidate(de.ipbhalle.metfraglib.interfaces.ICandidate) MultipleHeadersFoundInInputDatabaseException(de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException) BufferedWriter(java.io.BufferedWriter) LocalPSVDatabase(de.ipbhalle.metfraglib.database.LocalPSVDatabase) SmartsGroup(de.ipbhalle.metfraglib.substructure.SmartsGroup) CandidateList(de.ipbhalle.metfraglib.list.CandidateList) File(java.io.File) Settings(de.ipbhalle.metfraglib.settings.Settings) PeakToSmartsGroupList(de.ipbhalle.metfraglib.substructure.PeakToSmartsGroupList)

Aggregations

MultipleHeadersFoundInInputDatabaseException (de.ipbhalle.metfraglib.exceptions.MultipleHeadersFoundInInputDatabaseException)20 CandidateList (de.ipbhalle.metfraglib.list.CandidateList)16 LocalPSVDatabase (de.ipbhalle.metfraglib.database.LocalPSVDatabase)12 ICandidate (de.ipbhalle.metfraglib.interfaces.ICandidate)12 MetFragGlobalSettings (de.ipbhalle.metfraglib.settings.MetFragGlobalSettings)12 File (java.io.File)8 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 LocalCSVDatabase (de.ipbhalle.metfraglib.database.LocalCSVDatabase)7 IDatabase (de.ipbhalle.metfraglib.interfaces.IDatabase)6 Settings (de.ipbhalle.metfraglib.settings.Settings)5 AtomTypeNotKnownFromInputListException (de.ipbhalle.metfraglib.exceptions.AtomTypeNotKnownFromInputListException)4 CandidateListWriterPSV (de.ipbhalle.metfraglib.writer.CandidateListWriterPSV)4 BufferedWriter (java.io.BufferedWriter)4 FileWriter (java.io.FileWriter)4 TopDownPrecursorCandidate (de.ipbhalle.metfraglib.candidate.TopDownPrecursorCandidate)3 DatabaseIdentifierNotFoundException (de.ipbhalle.metfraglib.exceptions.DatabaseIdentifierNotFoundException)3 BufferedReader (java.io.BufferedReader)3 Hashtable (java.util.Hashtable)3 CDKException (org.openscience.cdk.exception.CDKException)3