use of de.ipbhalle.metfraglib.database.LocalPSVDatabase in project MetFragRelaunched by ipb-halle.
the class DownloadEntriesFromPubChem method downloadFromCandidateFile.
public static void downloadFromCandidateFile(String filenameIn, String filenameOut) {
MetFragGlobalSettings settingsIn = new MetFragGlobalSettings();
settingsIn.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filenameIn);
LocalPSVDatabase dbIn = new LocalPSVDatabase(settingsIn);
ArrayList<String> identifiers = null;
try {
identifiers = dbIn.getCandidateIdentifiers();
} catch (MultipleHeadersFoundInInputDatabaseException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
CandidateList candidates = dbIn.getCandidateByIdentifier(identifiers);
String[] ids = new String[candidates.getNumberElements()];
for (int i = 0; i < ids.length; i++) ids[i] = candidates.getElement(i).getIdentifier();
MetFragGlobalSettings settingsPubChem = new MetFragGlobalSettings();
settingsPubChem.set(VariableNames.PRECURSOR_DATABASE_IDS_NAME, ids);
OnlineExtendedPubChemDatabase pubchemDB = new OnlineExtendedPubChemDatabase(settingsPubChem);
try {
identifiers = pubchemDB.getCandidateIdentifiers();
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
CandidateList candidatesPubChem = null;
try {
candidatesPubChem = pubchemDB.getCandidateByIdentifier(identifiers);
} catch (Exception e1) {
e1.printStackTrace();
}
for (int i = 0; i < candidatesPubChem.getNumberElements(); i++) {
String identifier = candidatesPubChem.getElement(i).getIdentifier();
try {
ICandidate currentCandidate = dbIn.getCandidateByIdentifier(identifier);
currentCandidate.setProperty(VariableNames.PUBCHEM_NUMBER_PUBMED_REFERENCES_NAME, candidatesPubChem.getElement(i).getProperty(VariableNames.PUBCHEM_NUMBER_PUBMED_REFERENCES_NAME));
currentCandidate.setProperty(VariableNames.PUBCHEM_NUMBER_PATENTS_NAME, candidatesPubChem.getElement(i).getProperty(VariableNames.PUBCHEM_NUMBER_PATENTS_NAME));
} catch (DatabaseIdentifierNotFoundException e) {
e.printStackTrace();
}
}
CandidateListWriterPSV writer = new CandidateListWriterPSV();
String filename = filenameOut.replaceAll(".*\\/", "").replaceAll("\\..*$", "");
String path = filenameOut.replaceAll(filename + "\\..*$", "");
try {
writer.write(candidates, filename, path);
} catch (Exception e) {
e.printStackTrace();
}
}
use of de.ipbhalle.metfraglib.database.LocalPSVDatabase in project MetFragRelaunched by ipb-halle.
the class WriteLossAnnotationFile method main.
/*
* write annotation file
*
* filename - input file name
* mzppm
* mzabs
* probtype - probability type: 1 - P ( s | p ); 2 - P ( p | s ); 3 - P ( p , s ) from s; 4 - P ( p , s ) from p; 5 - P ( s | p ) P ( p | s ) P ( p , s )_s P ( p , s )_p
* output - output smarts
* outputSMILES - output smiles
* occurThresh
*
*/
public static void main(String[] args) throws MultipleHeadersFoundInInputDatabaseException, Exception {
java.util.Hashtable<String, String> readParameters = readParameters(args);
if (!readParameters.containsKey("filename")) {
System.err.println("filename missing");
System.exit(1);
}
if (!readParameters.containsKey("mzppm")) {
System.err.println("mzppm missing");
System.exit(1);
}
if (!readParameters.containsKey("mzabs")) {
System.err.println("mzabs missing");
System.exit(1);
}
if (!readParameters.containsKey("probtype")) {
System.err.println("probtype missing");
System.exit(1);
}
String filename = readParameters.get("filename");
Double mzppm = Double.parseDouble(readParameters.get("mzppm"));
Double mzabs = Double.parseDouble(readParameters.get("mzabs"));
Integer probabilityType = Integer.parseInt(readParameters.get("probtype"));
String output = null;
String outputSmiles = null;
Integer occurThresh = null;
if (readParameters.containsKey("output"))
output = readParameters.get("output");
if (readParameters.containsKey("outputSMILES"))
outputSmiles = readParameters.get("outputSMILES");
if (readParameters.containsKey("occurThresh"))
occurThresh = Integer.parseInt(readParameters.get("occurThresh"));
Settings settings = new Settings();
settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, filename);
LocalPSVDatabase db = new LocalPSVDatabase(settings);
java.util.ArrayList<String> ids = db.getCandidateIdentifiers();
CandidateList candidateList = db.getCandidateByIdentifier(ids);
// SmilesOfExplPeaks
PeakToSmartsGroupListCollection peakToSmartGroupListCollection = new PeakToSmartsGroupListCollection();
for (int i = 0; i < candidateList.getNumberElements(); i++) {
ICandidate candidate = candidateList.getElement(i);
String smilesOfExplPeaks = (String) candidate.getProperty("LossSmilesOfExplPeaks");
String aromaticSmilesOfExplPeaks = (String) candidate.getProperty("LossAromaticSmilesOfExplPeaks");
smilesOfExplPeaks = smilesOfExplPeaks.trim();
aromaticSmilesOfExplPeaks = aromaticSmilesOfExplPeaks.trim();
if (smilesOfExplPeaks.equals("NA") || aromaticSmilesOfExplPeaks.equals("NA"))
continue;
String[] pairs = smilesOfExplPeaks.split(";");
String[] aromaticPairs = aromaticSmilesOfExplPeaks.split(";");
if (pairs.length != aromaticPairs.length) {
System.out.println(candidate.getIdentifier() + " " + candidate.getProperty(VariableNames.INCHI_KEY_1_NAME));
continue;
}
for (int k = 0; k < pairs.length; k++) {
String[] tmp = pairs[k].split(":");
String[] aromaticTmp = aromaticPairs[k].split(":");
Double peak = Double.parseDouble(tmp[0]);
String smiles = null;
String smarts = null;
try {
smiles = tmp[1];
smarts = aromaticTmp[1];
} catch (Exception e) {
continue;
}
PeakToSmartsGroupList peakToSmartGroupList = peakToSmartGroupListCollection.getElementByPeak(peak, mzppm, mzabs);
if (peakToSmartGroupList == null) {
peakToSmartGroupList = new PeakToSmartsGroupList(peak);
SmartsGroup obj = new SmartsGroup(0.0, null, null, null);
obj.addElement(smarts);
obj.addSmiles(smiles);
peakToSmartGroupList.addElement(obj);
peakToSmartGroupListCollection.addElementSorted(peakToSmartGroupList);
} else {
peakToSmartGroupList.setPeakmz((peakToSmartGroupList.getPeakmz() + peak) / 2.0);
SmartsGroup smartsGroup = peakToSmartGroupList.getElementBySmiles(smiles, 1.0);
if (smartsGroup != null) {
smartsGroup.addElement(smarts);
smartsGroup.addSmiles(smiles);
} else {
smartsGroup = new SmartsGroup(0.0, null, null, null);
smartsGroup.addElement(smarts);
smartsGroup.addSmiles(smiles);
peakToSmartGroupList.addElement(smartsGroup);
}
}
}
}
// test filtering
if (occurThresh != null)
peakToSmartGroupListCollection.filterByOccurence(occurThresh);
peakToSmartGroupListCollection.annotateIds();
// get absolute numbers of single substructure occurences
// N^(s)
int[] substrOccurences = peakToSmartGroupListCollection.calculateSubstructureAbsoluteProbabilities();
int[] peakOccurences = peakToSmartGroupListCollection.calculatePeakAbsoluteProbabilities();
// P ( s | p )
if (probabilityType == 1) {
// calculate P ( s | p )
peakToSmartGroupListCollection.updateConditionalProbabilities();
peakToSmartGroupListCollection.removeDuplicates();
peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
peakToSmartGroupListCollection.sortElementsByProbability();
}
// P ( p | s )
if (probabilityType == 2) {
System.out.println("annotating IDs");
// calculate P ( p | s )
peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
peakToSmartGroupListCollection.removeDuplicates();
peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
peakToSmartGroupListCollection.sortElementsByProbability();
}
// P ( p , s )_s
if (probabilityType == 3) {
System.out.println("annotating IDs");
// calculate P ( p , s )
peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
peakToSmartGroupListCollection.removeDuplicates();
peakToSmartGroupListCollection.setProbabilityToJointProbability();
peakToSmartGroupListCollection.sortElementsByProbability();
}
// P ( p , s )_p
if (probabilityType == 4) {
System.out.println("annotating IDs");
// calculate P ( p , s )
peakToSmartGroupListCollection.updateJointProbabilitiesWithPeaks(peakOccurences);
peakToSmartGroupListCollection.removeDuplicates();
peakToSmartGroupListCollection.setProbabilityToJointProbability();
peakToSmartGroupListCollection.sortElementsByProbability();
}
// P ( s | p ) P ( p | s ) P( s, p )_s
if (probabilityType == 5) {
System.out.println("annotating IDs");
peakToSmartGroupListCollection.updateConditionalProbabilities();
peakToSmartGroupListCollection.updateProbabilities(substrOccurences);
peakToSmartGroupListCollection.updateJointProbabilitiesWithSubstructures(substrOccurences);
peakToSmartGroupListCollection.removeDuplicates();
peakToSmartGroupListCollection.setProbabilityToConditionalProbability_sp();
peakToSmartGroupListCollection.sortElementsByProbability();
if (output == null)
peakToSmartGroupListCollection.print();
else {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_1")));
bwriter.write(peakToSmartGroupListCollection.toString());
bwriter.close();
}
if (outputSmiles != null) {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_1")));
bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
bwriter.close();
}
peakToSmartGroupListCollection.setProbabilityToConditionalProbability_ps();
peakToSmartGroupListCollection.sortElementsByProbability();
if (output == null)
peakToSmartGroupListCollection.print();
else {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_2")));
bwriter.write(peakToSmartGroupListCollection.toString());
bwriter.close();
}
if (outputSmiles != null) {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_2")));
bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
bwriter.close();
}
peakToSmartGroupListCollection.setProbabilityToJointProbability();
if (output == null)
peakToSmartGroupListCollection.print();
else {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output + "_3")));
peakToSmartGroupListCollection.sortElementsByProbability();
bwriter.write(peakToSmartGroupListCollection.toString());
bwriter.close();
}
if (outputSmiles != null) {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles + "_3")));
bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
bwriter.close();
}
}
if (probabilityType != 5) {
if (output == null)
peakToSmartGroupListCollection.print();
else {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(output)));
bwriter.write(peakToSmartGroupListCollection.toString());
bwriter.close();
}
if (outputSmiles != null) {
BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(outputSmiles)));
bwriter.write(peakToSmartGroupListCollection.toStringSmiles());
bwriter.close();
}
}
}
use of de.ipbhalle.metfraglib.database.LocalPSVDatabase in project MetFragRelaunched by ipb-halle.
the class AddMissingNonExplainedPeaks method main.
public static void main(String[] args) throws Exception {
String paramfile = args[0];
String resultfile = args[1];
String outputfile = args[2];
Settings settings = getSettings(paramfile);
settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, resultfile);
IPeakListReader peakListReader = (IPeakListReader) Class.forName((String) settings.get(VariableNames.METFRAG_PEAK_LIST_READER_NAME)).getConstructor(Settings.class).newInstance(settings);
SettingsChecker settingsChecker = new SettingsChecker();
if (!settingsChecker.check(settings)) {
System.err.println("Problems reading settings");
return;
}
settings.set(VariableNames.PEAK_LIST_NAME, peakListReader.read());
IDatabase db = null;
String dbFilename = (String) settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME);
if (dbFilename.endsWith("psv"))
db = new LocalPSVDatabase(settings);
else
db = new LocalCSVDatabase(settings);
ArrayList<String> ids = null;
try {
ids = db.getCandidateIdentifiers();
} catch (MultipleHeadersFoundInInputDatabaseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
CandidateList candidates = null;
try {
candidates = db.getCandidateByIdentifier(ids);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
if (candidates.getNumberElements() == 0) {
System.out.println("No candidates found in " + (String) settings.get(VariableNames.LOCAL_DATABASE_PATH_NAME));
return;
}
DefaultPeakList peaklist = (DefaultPeakList) settings.get(VariableNames.PEAK_LIST_NAME);
for (int i = 0; i < candidates.getNumberElements(); i++) {
String explPeaks = (String) candidates.getElement(i).getProperty("ExplPeaks");
String[] explPeaksArray = explPeaks.split(";");
Double[] explPeaksMasses = null;
if (!explPeaks.equals("NA"))
explPeaksMasses = getDoubleArrayFromPeakList(explPeaksArray);
String nonExplPeaksString = "";
for (int k = 0; k < peaklist.getNumberElements(); k++) {
if (explPeaks.equals("NA")) {
nonExplPeaksString += ((IPeak) peaklist.getElement(k)).getMass() + ";";
} else if (!isContained(((IPeak) peaklist.getElement(k)).getMass(), explPeaksMasses)) {
nonExplPeaksString += ((IPeak) peaklist.getElement(k)).getMass() + ";";
}
}
if (nonExplPeaksString.length() == 0)
nonExplPeaksString = "NA";
if (nonExplPeaksString.endsWith(";"))
nonExplPeaksString = nonExplPeaksString.substring(0, nonExplPeaksString.length() - 1);
candidates.getElement(i).setProperty("NonExplainedMasses", nonExplPeaksString);
}
IWriter writer = null;
if (outputfile.endsWith("psv"))
writer = new CandidateListWriterPSV();
else
writer = new CandidateListWriterCSV();
writer.write(candidates, outputfile);
}
use of de.ipbhalle.metfraglib.database.LocalPSVDatabase in project MetFragRelaunched by ipb-halle.
the class CombineResultsForAnnotation method getMatchingCandidate.
/**
* @param metfragFiles
* @param id
* @param inchikey1
* @return
*/
public static ICandidate getMatchingCandidate(File[] metfragFiles, String id, String inchikey1) {
for (int i = 0; i < metfragFiles.length; i++) {
if (metfragFiles[i].getName().startsWith(id)) {
MetFragGlobalSettings settings = new MetFragGlobalSettings();
settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, metfragFiles[i].getAbsolutePath());
IDatabase db = null;
if (metfragFiles[i].getName().endsWith("csv"))
db = new LocalCSVDatabase(settings);
else
db = new LocalPSVDatabase(settings);
ArrayList<String> identifiers = null;
try {
identifiers = db.getCandidateIdentifiers();
} catch (MultipleHeadersFoundInInputDatabaseException e1) {
e1.printStackTrace();
} catch (Exception e1) {
e1.printStackTrace();
}
CandidateList candidates = null;
try {
candidates = db.getCandidateByIdentifier(identifiers);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
for (int ii = 0; ii < candidates.getNumberElements(); ii++) {
if (((String) candidates.getElement(ii).getProperty(VariableNames.INCHI_KEY_1_NAME)).equals(inchikey1)) {
return candidates.getElement(ii);
}
}
}
}
return null;
}
use of de.ipbhalle.metfraglib.database.LocalPSVDatabase in project MetFragRelaunched by ipb-halle.
the class CollectCommuniesAndFragmentPeaks method main.
public static void main(String[] args) {
MetFragGlobalSettings settings = new MetFragGlobalSettings();
settings.set(VariableNames.LOCAL_DATABASE_PATH_NAME, args[0]);
LocalPSVDatabase db = new LocalPSVDatabase(settings);
java.util.ArrayList<String> ids = null;
try {
ids = db.getCandidateIdentifiers();
} catch (MultipleHeadersFoundInInputDatabaseException e1) {
e1.printStackTrace();
} catch (Exception e1) {
e1.printStackTrace();
}
CandidateList candidates = db.getCandidateByIdentifier(ids);
ArrayList<Double> masses = new ArrayList<Double>();
ArrayList<ArrayList<String>> vector_formulas = new ArrayList<ArrayList<String>>();
ArrayList<ArrayList<String>> vector_smiles = new ArrayList<ArrayList<String>>();
ArrayList<ArrayList<String>> vector_eawagids = new ArrayList<ArrayList<String>>();
ArrayList<ArrayList<Double>> vector_intensities = new ArrayList<ArrayList<Double>>();
ArrayList<Integer> occurences = new ArrayList<Integer>();
ArrayList<ArrayList<FingerPrintFragmentCollection>> matchingFragments = new ArrayList<ArrayList<FingerPrintFragmentCollection>>();
for (int i = 0; i < candidates.getNumberElements(); i++) {
// System.out.println(candidates.getElement(i).getIdentifier());
try {
candidates.getElement(i).initialisePrecursorCandidate();
} catch (AtomTypeNotKnownFromInputListException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
CommunityCalculation c = new CommunityCalculation((BitArrayPrecursor) candidates.getElement(i).getPrecursorMolecule());
DefaultBitArrayFragment[] communityFragments = c.getCommunityFragments();
String[] formulas = ((String) candidates.getElement(i).getProperties().get("FormulasOfExplPeaks")).split(";");
String eawagID = (String) candidates.getElement(i).getProperties().get("EawagID");
String[] smiles = ((String) candidates.getElement(i).getProperties().get("SmilesOfExplPeaks")).split(";");
String[] explPeaks = ((String) candidates.getElement(i).getProperties().get("ExplPeaks")).split(";");
boolean debug = false;
for (int k = 0; k < formulas.length; k++) {
String[] tmp_formula = formulas[k].split(":");
String[] tmp_smiles = smiles[k].split(":");
String[] tmp_intensity = explPeaks[k].split("_");
if (tmp_formula.length != 2 || tmp_smiles.length != 2 || tmp_intensity.length != 2)
continue;
double intensity = Double.parseDouble(tmp_intensity[1]);
if (intensity < intensityThreshold)
continue;
double mass = Double.parseDouble(tmp_formula[0]);
int index = containsDouble(masses, mass, 5.0, 0.001, debug);
if (index == -1) {
int addedIndex = addMassSorted(masses, mass, debug);
/*
* how often we have ssen this peak mass
* here: only once as initial add
*/
occurences.add(addedIndex, 1);
/*
* add the fragment
*/
addFragmentsAddPositionInitial(candidates.getElement(i).getPrecursorMolecule(), addedIndex, matchingFragments, communityFragments, candidates.getElement(i).getIdentifier());
ArrayList<String> tmp_formulas = new ArrayList<String>();
ArrayList<String> tmp_smiless = new ArrayList<String>();
ArrayList<Double> tmp_intensities = new ArrayList<Double>();
ArrayList<String> tmp_eawagids = new ArrayList<String>();
tmp_formulas.add(tmp_formula[1]);
tmp_smiless.add(tmp_smiles[1]);
tmp_intensities.add(Double.parseDouble(tmp_intensity[1]));
tmp_eawagids.add(eawagID);
vector_formulas.add(addedIndex, tmp_formulas);
vector_smiles.add(addedIndex, tmp_smiless);
vector_intensities.add(addedIndex, tmp_intensities);
vector_eawagids.add(addedIndex, tmp_eawagids);
} else {
masses.set(index, (masses.get(index) + mass) / 2.0);
occurences.set(index, occurences.get(index) + 1);
addFragmentsAddPosition(candidates.getElement(i).getPrecursorMolecule(), index, matchingFragments, communityFragments, candidates.getElement(i).getIdentifier());
vector_formulas.get(index).add(tmp_formula[1]);
vector_smiles.get(index).add(tmp_smiles[1]);
vector_intensities.get(index).add(Double.parseDouble(tmp_intensity[1]));
vector_eawagids.get(index).add(eawagID);
}
}
}
printMasses(masses);
java.io.BufferedWriter bwriter;
java.io.BufferedWriter smilesBwriter;
try {
smilesBwriter = new java.io.BufferedWriter(new java.io.FileWriter(new java.io.File(args[2])));
bwriter = new java.io.BufferedWriter(new java.io.FileWriter(new java.io.File(args[1])));
for (int i = 0; i < masses.size(); i++) {
// System.out.println(masses.get(i));
bwriter.write(masses.get(i) + "");
bwriter.newLine();
for (int k = 0; k < vector_formulas.get(i).size(); k++) {
// System.out.println("\t" + vector_eawagids.get(i).get(k) + ": " + vector_formulas.get(i).get(k) + " " + vector_smiles.get(i).get(k) + " " + vector_intensities.get(i).get(k));
bwriter.write("\t" + vector_eawagids.get(i).get(k) + ": " + vector_formulas.get(i).get(k) + " " + vector_smiles.get(i).get(k) + " " + vector_intensities.get(i).get(k));
smilesBwriter.write(vector_smiles.get(i).get(k));
smilesBwriter.newLine();
bwriter.newLine();
}
// System.out.print("\t");
bwriter.write("\t");
for (int k = 0; k < vector_formulas.get(i).size(); k++) {
// System.out.print(vector_eawagids.get(i).get(k) + ".png ");
bwriter.write(vector_eawagids.get(i).get(k) + ".png ");
}
// System.out.println();
bwriter.newLine();
bwriter.newLine();
// System.out.println();
}
bwriter.close();
smilesBwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
java.io.BufferedWriter bwriterComms = new java.io.BufferedWriter(new java.io.FileWriter(new java.io.File(args[3])));
for (int i = 0; i < matchingFragments.size(); i++) {
bwriterComms.write(masses.get(i) + " " + occurences.get(i));
bwriterComms.newLine();
ArrayList<FingerPrintFragmentCollection> collections = matchingFragments.get(i);
for (int j = 0; j < collections.size(); j++) {
FingerPrintFragmentCollection collection = collections.get(j);
bwriterComms.write("\t");
bwriterComms.write(collection.toString());
bwriterComms.newLine();
}
}
bwriterComms.close();
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations