use of massbank.Record in project MassBank-web by MassBank.
the class AddMetaData method main.
public static void main(String[] arguments) throws Exception {
// load version and print
final Properties properties = new Properties();
try {
properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
System.out.println("AddMetaData version: " + properties.getProperty("version"));
// parse command line
Options options = new Options();
options.addOption("a", "all", false, "execute all operations");
options.addOption("p", "publication", false, "format PUBLICATION tag from given DOI to follow the guidelines of ACS");
options.addOption("n", "name", false, "fix common problems in CH$NAME tag");
options.addOption("l", "link", false, "add links to CH$LINK tag");
options.addOption("r", "rewrite", false, "read and rewrite the file.");
options.addOption("ms_focused_ion", false, "Inspect MS$FOCUSED_ION");
options.addOption(null, "add-inchikey", false, "Add or fix InChIKey from the value in CH$IUPAC");
options.addOption(null, "add-pubchemcid", false, "Add or fix PubChem CID from InChIKey and flag Problems.");
CommandLine cmd = null;
try {
cmd = new DefaultParser().parse(options, arguments);
} catch (ParseException e) {
// oops, something went wrong
System.err.println("Parsing command line failed. Reason: " + e.getMessage());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.getArgList().size() == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
// find all files in arguments and all *.txt files in directories and subdirectories
// specified in arguments
List<File> recordfiles = new ArrayList<>();
for (String argument : cmd.getArgList()) {
File argumentf = new File(argument);
if (argumentf.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
recordfiles.add(argumentf);
} else if (argumentf.isDirectory()) {
recordfiles.addAll(FileUtils.listFiles(argumentf, new String[] { "txt" }, true));
} else {
logger.warn("Argument " + argument + " could not be processed.");
}
}
if (recordfiles.size() == 0) {
logger.error("No files found.");
System.exit(1);
}
// validate all files
logger.trace("Validating " + recordfiles.size() + " files");
AtomicBoolean doAddPubchemCid = new AtomicBoolean(cmd.hasOption("add-pubchemcid"));
recordfiles.parallelStream().forEach(filename -> {
String recordString;
logger.info("Working on " + filename + ".");
try {
recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
// read record in less strict mode
Set<String> config = new HashSet<String>();
config.add("legacy");
config.add("weak");
Record record = Validator.validate(recordString, "", config);
if (record == null) {
System.err.println("Validation of \"" + filename + "\" failed. Exiting.");
System.exit(1);
} else if (record.DEPRECATED()) {
System.exit(0);
}
String recordstring2 = recordString;
if (doAddPubchemCid.get()) {
recordstring2 = doAddPubchemCID(record);
}
config = new HashSet<String>();
if (!recordString.equals(recordstring2)) {
Record record2 = Validator.validate(recordString, "", config);
if (record2 == null) {
System.err.println("Validation of new created record file failed. Do not write.");
} else {
try {
FileUtils.write(filename, recordstring2, StandardCharsets.UTF_8);
} catch (IOException exp) {
System.err.println("Writing file \"" + filename + "\" failed. Reason: " + exp.getMessage());
System.exit(1);
}
}
}
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
});
}
use of massbank.Record in project MassBank-web by MassBank.
the class RecordExporter method main.
/*
.ms2 text format: peptides
-----------------
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats
This format is recongnized by proteowizard's msconvert and can be converted into other formats such as .mzXML.
In an .ms2 file there are four types of lines.
Lines beginning with 'H' are header lines and contain information about how the data was collected as well as comments. They appear at the beginning of the file.
Lines beginning with 'S' are followed by the scan number and the precursor m/z.
Lines beginning with 'Z' give the charge state followed by the mass of the ion at that charge state.
Lines beginning with 'D' contain information relevant to the preceeding charge state. BlibToMs2's output will include D-lines with the sequence and modified sequence.
The file is arranged with these S, Z and D lines for one spectrum followed by a peak list:
a pair of values giving each peaks m/z and intensity. Here is an example file
H CreationDate Mon Apr 12 15:12:14 2010
H Extractor BlibToMs2
H Library /home/me/research/search/demo.blib
S 1 1 636.34
Z 2 1253.36
D seq FKNGFQTGSASK
D modified seq FKNGFQTGSASK
187.40 12.5
193.10 19.5
242.30 14.2
244.30 9.0
S 2 2 745.3
Z 2 1471.7
D seq NFLETVELQVGLK
D modified seq NFLETVELQVGLK
1224.60 7.9
1228.70 468.9
1230.40 658.5
1231.50 144.2
BlibBuild .ssl file:
--------------------
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats
NIST *.msp file:
----------------
https://chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf
(section 'Spectrum Fields and Format')
Name: KDLGEEHFK/2
MW: 1103.561
Comment: Spec=Consensus Pep=N-Semitryp_irreg/miss_good Fullname=F.KDLGEEHFK.G/2 Mods=0 Parent=551.781 Inst=it Mz_diff=0.544 Mz_exact=551.7805 Mz_av=552.114 Protein="sp|P02769|ALBU_BOVIN Serum albumin precursor (Allergen Bos d 6) (BSA) - Bos taurus (Bovine)." Pseq=131/1 Organism="Protein" Se=4^X12:ex=0.00037/0.0003992,td=25.85/1379,sd=0/0,hs=38.5/1.433,bs=0.00027,b2=0.00028,bd=133^O10:ex=0.0002435/0.0009314,td=74.85/3.186e+004,pr=3.235e-007/8.612e-007,bs=2.73e-005,b2=5.56e-005,bd=1.56^I1:ex=0.0339/0,dc=0.939/0,do=6.14/0,bs=0.0339,bd=0.939^C1:ex=0.032/0,td=0/0,sd=0/0,hs=555/0,bs=0.032 Sample=7/bsa_cam,2,6/bsa_cam_different_voltages,1,3/bsa_none,0,1/nist_yl_31011_sigma_t9253_bsa_cam,4,6/nist_yl_31011_sigma_t9253_bsa_time_cam,4,6/nist_yl_31611_sigma_t9253_bsa_cam,0,3/nist_yl_sgma_t9253_bsa_none,1,2 Nreps=12/27 Missing=0.1916/0.0688 Parent_med=552.3075/0.22 Max2med_orig=100.0/0.0 Dotfull=0.743/0.044 Dot_cons=0.809/0.048 Unassign_all=0.173 Unassigned=0.105 Dotbest=0.83 Flags=12,9,1 Naa=9 DUScorr=1.5/0.71/2.9 Dottheory=0.84 Pfin=4.6e+008 Probcorr=6.7 Tfratio=6e+003 Pfract=0 Unassigned_corrected=0.011
Num peaks: 124
201.2 149 "? 11/10 0.7"
209.1 238 "b2-35/-0.02 11/11 0.7"
226.3 779 "b2-18/0.18 12/12 1.7"
227.3 484 "b2-17/0.18 12/12 0.9"
228.4 62 "b2-17i/1.28 7/10 0.2"
*/
public static void main(String[] arguments) {
// load version and print
final Properties properties = new Properties();
try {
properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
System.out.println("Exporter version: " + properties.getProperty("version"));
// parse command line
Options options = new Options();
options.addRequiredOption("o", "outfile", true, "name of output file");
options.addOption("f", "format", true, "output format; possible values: RIKEN_MSP, NIST_MSP; default is RIKEN_MSP");
CommandLine cmd = null;
try {
cmd = new DefaultParser().parse(options, arguments);
} catch (ParseException e) {
// oops, something went wrong
System.err.println("Parsing command line failed. Reason: " + e.getMessage());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.getArgList().size() == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
String format = cmd.getOptionValue("f");
if (format != null) {
if (!Arrays.asList("RIKEN_MSP", "NIST_MSP").contains(format)) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("RecordExporter [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
}
// loop over all arguments
// find all files in arguments and all *.txt files in directories and subdirectories
// specified in arguments
List<Record> records = cmd.getArgList().parallelStream().map(argument -> {
// find all files in arguments and all *.txt files in directories and subdirectories
// specified in arguments
File argumentFile = new File(argument);
List<File> filesToProcess = new ArrayList<File>();
if (argumentFile.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
filesToProcess.add(argumentFile);
} else if (argumentFile.isDirectory()) {
if (!argumentFile.getName().startsWith("."))
filesToProcess.addAll(FileUtils.listFiles(argumentFile, new String[] { "txt" }, true));
} else {
logger.warn("Argument " + argument + " could not be processed.");
}
// read all files and process to Record
List<Record> argumentRecords = filesToProcess.parallelStream().map(filename -> {
Record record = null;
try {
String recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
Set<String> config = new HashSet<String>();
config.add("legacy");
record = Validator.validate(recordString, "", config);
} catch (IOException e) {
e.printStackTrace();
}
return record;
}).collect(Collectors.toList());
return argumentRecords;
}).flatMap(Collection::stream).filter(Objects::nonNull).collect(Collectors.toList());
// System.out.println(recordfiles.toString());
File outfile = new File(cmd.getOptionValue("o"));
// default output format is RIREN_MSP
if (format == null) {
format = "RIKEN_MSP";
}
switch(format) {
case "RIKEN_MSP":
RecordToRIKEN_MSP.recordsToRIKEN_MSP(outfile, records);
break;
case "NIST_MSP":
RecordToNIST_MSP.recordsToNIST_MSP(outfile, records);
break;
default:
logger.error("This code should not run.");
System.exit(1);
}
}
use of massbank.Record in project MassBank-web by MassBank.
the class Validator method main.
public static void main(String[] arguments) {
// load version and print
final Properties properties = new Properties();
try {
properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
System.out.println("Validator version: " + properties.getProperty("version"));
// parse command line
Options options = new Options();
options.addOption(null, "db", false, "also read record from database and compare with original Record; Developer Feature!");
options.addOption(null, "legacy", false, "less strict mode for legacy records with minor problems.");
options.addOption(null, "online", false, "also do online checks, like PubChem CID check.");
CommandLine cmd = null;
try {
cmd = new DefaultParser().parse(options, arguments);
} catch (ParseException e) {
// oops, something went wrong
System.err.println("Parsing command line failed. Reason: " + e.getMessage());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("Validator [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.getArgList().size() == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("Validator [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.hasOption("legacy"))
System.out.println("Validation mode: legacy");
// find all files in arguments and all *.txt files in directories and subdirectories
// specified in arguments
List<File> recordfiles = new ArrayList<>();
for (String argument : cmd.getArgList()) {
File argumentf = new File(argument);
if (argumentf.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
recordfiles.add(argumentf);
} else if (argumentf.isDirectory()) {
recordfiles.addAll(FileUtils.listFiles(argumentf, new String[] { "txt" }, true));
} else {
logger.warn("Argument " + argument + " could not be processed.");
}
}
if (recordfiles.size() == 0) {
logger.error("No files found for validation.");
System.exit(1);
}
// validate all files
logger.trace("Validating " + recordfiles.size() + " files");
AtomicBoolean haserror = new AtomicBoolean(false);
AtomicBoolean doDatbase = new AtomicBoolean(cmd.hasOption("db"));
AtomicBoolean legacyMode = new AtomicBoolean(cmd.hasOption("legacy"));
AtomicBoolean onlineMode = new AtomicBoolean(cmd.hasOption("online"));
List<String> accessions = recordfiles.parallelStream().map(filename -> {
String recordString;
String accession = null;
logger.info("Working on " + filename + ".");
try {
recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
if (hasNonStandardChars(recordString)) {
logger.warn("Check " + filename + ".");
}
;
// basic validation
Set<String> config = new HashSet<String>();
if (legacyMode.get())
config.add("legacy");
if (onlineMode.get())
config.add("online");
Record record = validate(recordString, "", config);
if (record == null) {
logger.error("Error in \'" + filename + "\'.");
haserror.set(true);
} else // additional tests
{
logger.trace("validation passed for " + filename);
// compare ACCESSION with filename
accession = record.ACCESSION();
if (!accession.equals(FilenameUtils.getBaseName(filename.toString()))) {
logger.error("Error in \'" + filename.getName().toString() + "\'.");
logger.error("ACCESSION \'" + record.ACCESSION() + "\' does not match filename \'" + filename.getName().toString() + "\'");
haserror.set(true);
}
// validate correct serialization: String <-> (String -> Record class -> String)
String recordStringFromRecord = record.toString();
int position = StringUtils.indexOfDifference(new String[] { recordString, recordStringFromRecord });
if (position != -1) {
logger.error("Error in \'" + filename + "\'.");
logger.error("File content differs from generated record string.\nThis might be a code problem. Please Report!");
String[] tokens = recordStringFromRecord.split("\\n");
int line = 0, col = 0, offset = 0;
for (String token : tokens) {
offset = offset + token.length() + 1;
if (position < offset) {
col = position - (offset - (token.length() + 1));
logger.error("Error in line " + (line + 1) + ".");
logger.error(tokens[line]);
StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
error_at.append('^');
logger.error(error_at);
haserror.set(true);
break;
}
line++;
}
}
// validate correct serialization with db: String <-> (db -> Record class -> String)
if (doDatbase.get()) {
Record recordDatabase = null;
try {
DatabaseManager dbMan = new DatabaseManager("MassBank");
recordDatabase = dbMan.getAccessionData(record.ACCESSION());
dbMan.closeConnection();
} catch (SQLException | ConfigurationException e) {
e.printStackTrace();
System.exit(1);
}
if (recordDatabase == null) {
String errormsg = "retrieval of '" + record.ACCESSION() + "' from database failed";
logger.error(errormsg);
System.exit(1);
}
String recordStringFromDB = recordDatabase.toString();
position = StringUtils.indexOfDifference(new String[] { recordString, recordStringFromDB });
if (position != -1) {
logger.error("Error in \'" + filename + "\'.");
logger.error("File content differs from generated record string from database content.\nThis might be a code problem. Please Report!");
String[] tokens = recordStringFromDB.split("\\n");
int line = 0, col = 0, offset = 0;
for (String token : tokens) {
offset = offset + token.length() + 1;
if (position < offset) {
col = position - (offset - (token.length() + 1));
logger.error("Error in line " + (line + 1) + ".");
logger.error(tokens[line]);
StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
error_at.append('^');
logger.error(error_at);
haserror.set(true);
break;
}
line++;
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
return accession;
}).filter(Objects::nonNull).collect(Collectors.toList());
// check duplicates
Set<String> duplicates = new LinkedHashSet<String>();
Set<String> uniques = new HashSet<String>();
for (String c : accessions) {
// System.out.println(c);
if (!uniques.add(c)) {
duplicates.add(c);
}
}
if (duplicates.size() > 0) {
logger.error("There are duplicates in all accessions:");
logger.error(duplicates.toString());
haserror.set(true);
}
// return 1 if there were errors
if (haserror.get())
System.exit(1);
else
System.exit(0);
}
use of massbank.Record in project MassBank-web by MassBank.
the class Validator method validate.
/**
* Validate a <code>recordString</code> and return the parsed information in a {@link Record}
* or <code>null</code> if the validation was not successful. Options are given in
* <code>config</code>.
*/
public static Record validate(String recordString, String contributor, Set<String> config) {
Record record = new Record(contributor);
RecordParser recordparser = new RecordParser(record, config);
Result res = recordparser.parse(recordString);
if (res.isFailure()) {
logger.error(res.getMessage());
int position = res.getPosition();
String[] tokens = recordString.split("\\n");
int line = 0, col = 0, offset = 0;
for (String token : tokens) {
offset = offset + token.length() + 1;
if (position < offset) {
col = position - (offset - (token.length() + 1));
logger.error(tokens[line]);
StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
error_at.append('^');
logger.error(error_at);
break;
}
line++;
}
return null;
}
return record;
}
use of massbank.Record in project MassBank-web by MassBank.
the class RecordToNIST_MSP method recordsToNIST_MSP.
/**
* A wrapper to convert multiple Records and write to file.
* @param file to write
* @param records to convert
* @throws CDKException
*/
public static void recordsToNIST_MSP(File file, List<Record> records) {
// collect data
List<String> list = new ArrayList<String>();
for (Record record : records) {
list.add(convert(record));
list.add("");
}
BufferedWriter writer;
try {
writer = new BufferedWriter(new FileWriter(file));
for (String line : list) {
writer.write(line);
// writer.newLine();
}
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations