Search in sources :

Example 1 with Record

use of massbank.Record in project MassBank-web by MassBank.

the class AddMetaData method main.

public static void main(String[] arguments) throws Exception {
    // load version and print
    final Properties properties = new Properties();
    try {
        properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    System.out.println("AddMetaData version: " + properties.getProperty("version"));
    // parse command line
    Options options = new Options();
    options.addOption("a", "all", false, "execute all operations");
    options.addOption("p", "publication", false, "format PUBLICATION tag from given DOI to follow the guidelines of ACS");
    options.addOption("n", "name", false, "fix common problems in CH$NAME tag");
    options.addOption("l", "link", false, "add links to CH$LINK tag");
    options.addOption("r", "rewrite", false, "read and rewrite the file.");
    options.addOption("ms_focused_ion", false, "Inspect MS$FOCUSED_ION");
    options.addOption(null, "add-inchikey", false, "Add or fix InChIKey from the value in CH$IUPAC");
    options.addOption(null, "add-pubchemcid", false, "Add or fix PubChem CID from InChIKey and flag Problems.");
    CommandLine cmd = null;
    try {
        cmd = new DefaultParser().parse(options, arguments);
    } catch (ParseException e) {
        // oops, something went wrong
        System.err.println("Parsing command line failed. Reason: " + e.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    if (cmd.getArgList().size() == 0) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    // find all files in arguments and all *.txt files in directories and subdirectories
    // specified in arguments
    List<File> recordfiles = new ArrayList<>();
    for (String argument : cmd.getArgList()) {
        File argumentf = new File(argument);
        if (argumentf.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
            recordfiles.add(argumentf);
        } else if (argumentf.isDirectory()) {
            recordfiles.addAll(FileUtils.listFiles(argumentf, new String[] { "txt" }, true));
        } else {
            logger.warn("Argument " + argument + " could not be processed.");
        }
    }
    if (recordfiles.size() == 0) {
        logger.error("No files found.");
        System.exit(1);
    }
    // validate all files
    logger.trace("Validating " + recordfiles.size() + " files");
    AtomicBoolean doAddPubchemCid = new AtomicBoolean(cmd.hasOption("add-pubchemcid"));
    recordfiles.parallelStream().forEach(filename -> {
        String recordString;
        logger.info("Working on " + filename + ".");
        try {
            recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
            // read record in less strict mode
            Set<String> config = new HashSet<String>();
            config.add("legacy");
            config.add("weak");
            Record record = Validator.validate(recordString, "", config);
            if (record == null) {
                System.err.println("Validation of  \"" + filename + "\" failed. Exiting.");
                System.exit(1);
            } else if (record.DEPRECATED()) {
                System.exit(0);
            }
            String recordstring2 = recordString;
            if (doAddPubchemCid.get()) {
                recordstring2 = doAddPubchemCID(record);
            }
            config = new HashSet<String>();
            if (!recordString.equals(recordstring2)) {
                Record record2 = Validator.validate(recordString, "", config);
                if (record2 == null) {
                    System.err.println("Validation of new created record file failed. Do not write.");
                } else {
                    try {
                        FileUtils.write(filename, recordstring2, StandardCharsets.UTF_8);
                    } catch (IOException exp) {
                        System.err.println("Writing file \"" + filename + "\" failed. Reason: " + exp.getMessage());
                        System.exit(1);
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    });
}
Also used : Options(org.apache.commons.cli.Options) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) HelpFormatter(org.apache.commons.cli.HelpFormatter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CommandLine(org.apache.commons.cli.CommandLine) Record(massbank.Record) ParseException(org.apache.commons.cli.ParseException) File(java.io.File) DefaultParser(org.apache.commons.cli.DefaultParser) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 2 with Record

use of massbank.Record in project MassBank-web by MassBank.

the class RecordExporter method main.

/*
.ms2 text format: peptides
-----------------
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats
This format is recongnized by proteowizard's msconvert and can be converted into other formats such as .mzXML.
In an .ms2 file there are four types of lines. 
	Lines beginning with 'H' are header lines and contain information about how the data was collected as well as comments. They appear at the beginning of the file. 
	Lines beginning with 'S' are followed by the scan number and the precursor m/z. 
	Lines beginning with 'Z' give the charge state followed by the mass of the ion at that charge state. 
	Lines beginning with 'D' contain information relevant to the preceeding charge state. BlibToMs2's output will include D-lines with the sequence and modified sequence. 
The file is arranged with these S, Z and D lines for one spectrum followed by a peak list: 
	a pair of values giving each peaks m/z and intensity. Here is an example file 

H      CreationDate    Mon Apr 12 15:12:14 2010
H       Extractor       BlibToMs2
H       Library /home/me/research/search/demo.blib
S       1       1       636.34
Z       2       1253.36
D       seq     FKNGFQTGSASK
D       modified seq    FKNGFQTGSASK
187.40  12.5
193.10  19.5
242.30  14.2
244.30  9.0
S       2       2       745.3
Z       2       1471.7
D       seq     NFLETVELQVGLK
D       modified seq    NFLETVELQVGLK
1224.60 7.9
1228.70 468.9
1230.40 658.5
1231.50 144.2

BlibBuild .ssl file:
--------------------
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats

NIST *.msp file:
----------------
https://chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf
(section 'Spectrum Fields and Format')

Name: KDLGEEHFK/2
MW: 1103.561
Comment: Spec=Consensus Pep=N-Semitryp_irreg/miss_good Fullname=F.KDLGEEHFK.G/2 Mods=0 Parent=551.781 Inst=it Mz_diff=0.544 Mz_exact=551.7805 Mz_av=552.114 Protein="sp|P02769|ALBU_BOVIN Serum albumin precursor (Allergen Bos d 6) (BSA) - Bos taurus (Bovine)." Pseq=131/1 Organism="Protein" Se=4^X12:ex=0.00037/0.0003992,td=25.85/1379,sd=0/0,hs=38.5/1.433,bs=0.00027,b2=0.00028,bd=133^O10:ex=0.0002435/0.0009314,td=74.85/3.186e+004,pr=3.235e-007/8.612e-007,bs=2.73e-005,b2=5.56e-005,bd=1.56^I1:ex=0.0339/0,dc=0.939/0,do=6.14/0,bs=0.0339,bd=0.939^C1:ex=0.032/0,td=0/0,sd=0/0,hs=555/0,bs=0.032 Sample=7/bsa_cam,2,6/bsa_cam_different_voltages,1,3/bsa_none,0,1/nist_yl_31011_sigma_t9253_bsa_cam,4,6/nist_yl_31011_sigma_t9253_bsa_time_cam,4,6/nist_yl_31611_sigma_t9253_bsa_cam,0,3/nist_yl_sgma_t9253_bsa_none,1,2 Nreps=12/27 Missing=0.1916/0.0688 Parent_med=552.3075/0.22 Max2med_orig=100.0/0.0 Dotfull=0.743/0.044 Dot_cons=0.809/0.048 Unassign_all=0.173 Unassigned=0.105 Dotbest=0.83 Flags=12,9,1 Naa=9 DUScorr=1.5/0.71/2.9 Dottheory=0.84 Pfin=4.6e+008 Probcorr=6.7 Tfratio=6e+003 Pfract=0 Unassigned_corrected=0.011
Num peaks: 124
201.2	149	"? 11/10 0.7"
209.1	238	"b2-35/-0.02 11/11 0.7"
226.3	779	"b2-18/0.18 12/12 1.7"
227.3	484	"b2-17/0.18 12/12 0.9"
228.4	62	"b2-17i/1.28 7/10 0.2"

*/
public static void main(String[] arguments) {
    // load version and print
    final Properties properties = new Properties();
    try {
        properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    System.out.println("Exporter version: " + properties.getProperty("version"));
    // parse command line
    Options options = new Options();
    options.addRequiredOption("o", "outfile", true, "name of output file");
    options.addOption("f", "format", true, "output format; possible values: RIKEN_MSP, NIST_MSP; default is RIKEN_MSP");
    CommandLine cmd = null;
    try {
        cmd = new DefaultParser().parse(options, arguments);
    } catch (ParseException e) {
        // oops, something went wrong
        System.err.println("Parsing command line failed. Reason: " + e.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    if (cmd.getArgList().size() == 0) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    String format = cmd.getOptionValue("f");
    if (format != null) {
        if (!Arrays.asList("RIKEN_MSP", "NIST_MSP").contains(format)) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
            System.exit(1);
        }
    }
    // loop over all arguments
    // find all files in arguments and all *.txt files in directories and subdirectories
    // specified in arguments
    List<Record> records = cmd.getArgList().parallelStream().map(argument -> {
        // find all files in arguments and all *.txt files in directories and subdirectories
        // specified in arguments
        File argumentFile = new File(argument);
        List<File> filesToProcess = new ArrayList<File>();
        if (argumentFile.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
            filesToProcess.add(argumentFile);
        } else if (argumentFile.isDirectory()) {
            if (!argumentFile.getName().startsWith("."))
                filesToProcess.addAll(FileUtils.listFiles(argumentFile, new String[] { "txt" }, true));
        } else {
            logger.warn("Argument " + argument + " could not be processed.");
        }
        // read all files and process to Record
        List<Record> argumentRecords = filesToProcess.parallelStream().map(filename -> {
            Record record = null;
            try {
                String recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
                Set<String> config = new HashSet<String>();
                config.add("legacy");
                record = Validator.validate(recordString, "", config);
            } catch (IOException e) {
                e.printStackTrace();
            }
            return record;
        }).collect(Collectors.toList());
        return argumentRecords;
    }).flatMap(Collection::stream).filter(Objects::nonNull).collect(Collectors.toList());
    // System.out.println(recordfiles.toString());
    File outfile = new File(cmd.getOptionValue("o"));
    // default output format is RIREN_MSP
    if (format == null) {
        format = "RIKEN_MSP";
    }
    switch(format) {
        case "RIKEN_MSP":
            RecordToRIKEN_MSP.recordsToRIKEN_MSP(outfile, records);
            break;
        case "NIST_MSP":
            RecordToNIST_MSP.recordsToNIST_MSP(outfile, records);
            break;
        default:
            logger.error("This code should not run.");
            System.exit(1);
    }
}
Also used : Record(massbank.Record) Arrays(java.util.Arrays) Properties(java.util.Properties) Collection(java.util.Collection) RecordToNIST_MSP(massbank.export.RecordToNIST_MSP) Options(org.apache.commons.cli.Options) Set(java.util.Set) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) HelpFormatter(org.apache.commons.cli.HelpFormatter) Collectors(java.util.stream.Collectors) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) ArrayList(java.util.ArrayList) DefaultParser(org.apache.commons.cli.DefaultParser) HashSet(java.util.HashSet) Objects(java.util.Objects) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ParseException(org.apache.commons.cli.ParseException) CommandLine(org.apache.commons.cli.CommandLine) RecordToRIKEN_MSP(massbank.export.RecordToRIKEN_MSP) LogManager(org.apache.logging.log4j.LogManager) FilenameUtils(org.apache.commons.io.FilenameUtils) Options(org.apache.commons.cli.Options) IOException(java.io.IOException) Properties(java.util.Properties) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) Collection(java.util.Collection) Record(massbank.Record) ArrayList(java.util.ArrayList) List(java.util.List) ParseException(org.apache.commons.cli.ParseException) File(java.io.File) DefaultParser(org.apache.commons.cli.DefaultParser) HashSet(java.util.HashSet)

Example 3 with Record

use of massbank.Record in project MassBank-web by MassBank.

the class Validator method main.

public static void main(String[] arguments) {
    // load version and print
    final Properties properties = new Properties();
    try {
        properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    System.out.println("Validator version: " + properties.getProperty("version"));
    // parse command line
    Options options = new Options();
    options.addOption(null, "db", false, "also read record from database and compare with original Record; Developer Feature!");
    options.addOption(null, "legacy", false, "less strict mode for legacy records with minor problems.");
    options.addOption(null, "online", false, "also do online checks, like PubChem CID check.");
    CommandLine cmd = null;
    try {
        cmd = new DefaultParser().parse(options, arguments);
    } catch (ParseException e) {
        // oops, something went wrong
        System.err.println("Parsing command line failed. Reason: " + e.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("Validator [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    if (cmd.getArgList().size() == 0) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("Validator [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
        System.exit(1);
    }
    if (cmd.hasOption("legacy"))
        System.out.println("Validation mode: legacy");
    // find all files in arguments and all *.txt files in directories and subdirectories
    // specified in arguments
    List<File> recordfiles = new ArrayList<>();
    for (String argument : cmd.getArgList()) {
        File argumentf = new File(argument);
        if (argumentf.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
            recordfiles.add(argumentf);
        } else if (argumentf.isDirectory()) {
            recordfiles.addAll(FileUtils.listFiles(argumentf, new String[] { "txt" }, true));
        } else {
            logger.warn("Argument " + argument + " could not be processed.");
        }
    }
    if (recordfiles.size() == 0) {
        logger.error("No files found for validation.");
        System.exit(1);
    }
    // validate all files
    logger.trace("Validating " + recordfiles.size() + " files");
    AtomicBoolean haserror = new AtomicBoolean(false);
    AtomicBoolean doDatbase = new AtomicBoolean(cmd.hasOption("db"));
    AtomicBoolean legacyMode = new AtomicBoolean(cmd.hasOption("legacy"));
    AtomicBoolean onlineMode = new AtomicBoolean(cmd.hasOption("online"));
    List<String> accessions = recordfiles.parallelStream().map(filename -> {
        String recordString;
        String accession = null;
        logger.info("Working on " + filename + ".");
        try {
            recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
            if (hasNonStandardChars(recordString)) {
                logger.warn("Check " + filename + ".");
            }
            ;
            // basic validation
            Set<String> config = new HashSet<String>();
            if (legacyMode.get())
                config.add("legacy");
            if (onlineMode.get())
                config.add("online");
            Record record = validate(recordString, "", config);
            if (record == null) {
                logger.error("Error in \'" + filename + "\'.");
                haserror.set(true);
            } else // additional tests
            {
                logger.trace("validation passed for " + filename);
                // compare ACCESSION with filename
                accession = record.ACCESSION();
                if (!accession.equals(FilenameUtils.getBaseName(filename.toString()))) {
                    logger.error("Error in \'" + filename.getName().toString() + "\'.");
                    logger.error("ACCESSION \'" + record.ACCESSION() + "\' does not match filename \'" + filename.getName().toString() + "\'");
                    haserror.set(true);
                }
                // validate correct serialization: String <-> (String -> Record class -> String)
                String recordStringFromRecord = record.toString();
                int position = StringUtils.indexOfDifference(new String[] { recordString, recordStringFromRecord });
                if (position != -1) {
                    logger.error("Error in \'" + filename + "\'.");
                    logger.error("File content differs from generated record string.\nThis might be a code problem. Please Report!");
                    String[] tokens = recordStringFromRecord.split("\\n");
                    int line = 0, col = 0, offset = 0;
                    for (String token : tokens) {
                        offset = offset + token.length() + 1;
                        if (position < offset) {
                            col = position - (offset - (token.length() + 1));
                            logger.error("Error in line " + (line + 1) + ".");
                            logger.error(tokens[line]);
                            StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
                            error_at.append('^');
                            logger.error(error_at);
                            haserror.set(true);
                            break;
                        }
                        line++;
                    }
                }
                // validate correct serialization with db: String <-> (db -> Record class -> String)
                if (doDatbase.get()) {
                    Record recordDatabase = null;
                    try {
                        DatabaseManager dbMan = new DatabaseManager("MassBank");
                        recordDatabase = dbMan.getAccessionData(record.ACCESSION());
                        dbMan.closeConnection();
                    } catch (SQLException | ConfigurationException e) {
                        e.printStackTrace();
                        System.exit(1);
                    }
                    if (recordDatabase == null) {
                        String errormsg = "retrieval of '" + record.ACCESSION() + "' from database failed";
                        logger.error(errormsg);
                        System.exit(1);
                    }
                    String recordStringFromDB = recordDatabase.toString();
                    position = StringUtils.indexOfDifference(new String[] { recordString, recordStringFromDB });
                    if (position != -1) {
                        logger.error("Error in \'" + filename + "\'.");
                        logger.error("File content differs from generated record string from database content.\nThis might be a code problem. Please Report!");
                        String[] tokens = recordStringFromDB.split("\\n");
                        int line = 0, col = 0, offset = 0;
                        for (String token : tokens) {
                            offset = offset + token.length() + 1;
                            if (position < offset) {
                                col = position - (offset - (token.length() + 1));
                                logger.error("Error in line " + (line + 1) + ".");
                                logger.error(tokens[line]);
                                StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
                                error_at.append('^');
                                logger.error(error_at);
                                haserror.set(true);
                                break;
                            }
                            line++;
                        }
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
        return accession;
    }).filter(Objects::nonNull).collect(Collectors.toList());
    // check duplicates
    Set<String> duplicates = new LinkedHashSet<String>();
    Set<String> uniques = new HashSet<String>();
    for (String c : accessions) {
        // System.out.println(c);
        if (!uniques.add(c)) {
            duplicates.add(c);
        }
    }
    if (duplicates.size() > 0) {
        logger.error("There are duplicates in all accessions:");
        logger.error(duplicates.toString());
        haserror.set(true);
    }
    // return 1 if there were errors
    if (haserror.get())
        System.exit(1);
    else
        System.exit(0);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Options(org.apache.commons.cli.Options) DatabaseManager(massbank.db.DatabaseManager) SQLException(java.sql.SQLException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) HelpFormatter(org.apache.commons.cli.HelpFormatter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CommandLine(org.apache.commons.cli.CommandLine) ConfigurationException(org.apache.commons.configuration2.ex.ConfigurationException) Record(massbank.Record) ParseException(org.apache.commons.cli.ParseException) File(java.io.File) DefaultParser(org.apache.commons.cli.DefaultParser) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 4 with Record

use of massbank.Record in project MassBank-web by MassBank.

the class Validator method validate.

/**
 * Validate a <code>recordString</code> and return the parsed information in a {@link Record}
 * or <code>null</code> if the validation was not successful. Options are given in
 * <code>config</code>.
 */
public static Record validate(String recordString, String contributor, Set<String> config) {
    Record record = new Record(contributor);
    RecordParser recordparser = new RecordParser(record, config);
    Result res = recordparser.parse(recordString);
    if (res.isFailure()) {
        logger.error(res.getMessage());
        int position = res.getPosition();
        String[] tokens = recordString.split("\\n");
        int line = 0, col = 0, offset = 0;
        for (String token : tokens) {
            offset = offset + token.length() + 1;
            if (position < offset) {
                col = position - (offset - (token.length() + 1));
                logger.error(tokens[line]);
                StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
                error_at.append('^');
                logger.error(error_at);
                break;
            }
            line++;
        }
        return null;
    }
    return record;
}
Also used : RecordParser(massbank.RecordParser) Record(massbank.Record) Result(org.petitparser.context.Result)

Example 5 with Record

use of massbank.Record in project MassBank-web by MassBank.

the class RecordToNIST_MSP method recordsToNIST_MSP.

/**
 * A wrapper to convert multiple Records and write to file.
 * @param file to write
 * @param records to convert
 * @throws CDKException
 */
public static void recordsToNIST_MSP(File file, List<Record> records) {
    // collect data
    List<String> list = new ArrayList<String>();
    for (Record record : records) {
        list.add(convert(record));
        list.add("");
    }
    BufferedWriter writer;
    try {
        writer = new BufferedWriter(new FileWriter(file));
        for (String line : list) {
            writer.write(line);
        // writer.newLine();
        }
        writer.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) Record(massbank.Record) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter)

Aggregations

Record (massbank.Record)13 ArrayList (java.util.ArrayList)8 IOException (java.io.IOException)7 File (java.io.File)6 HashSet (java.util.HashSet)5 SQLException (java.sql.SQLException)4 Properties (java.util.Properties)4 BufferedWriter (java.io.BufferedWriter)3 FileWriter (java.io.FileWriter)3 DatabaseManager (massbank.db.DatabaseManager)3 CommandLine (org.apache.commons.cli.CommandLine)3 DefaultParser (org.apache.commons.cli.DefaultParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3 Options (org.apache.commons.cli.Options)3 ParseException (org.apache.commons.cli.ParseException)3 ConfigurationException (org.apache.commons.configuration2.ex.ConfigurationException)3 PreparedStatement (java.sql.PreparedStatement)2 ResultSet (java.sql.ResultSet)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2