Search in sources :

Example 1 with Result

use of org.petitparser.context.Result in project MassBank-web by MassBank.

the class Validator method validate.

/**
 * Validate a <code>recordString</code> and return the parsed information in a {@link Record}
 * or <code>null</code> if the validation was not successful. Options are given in
 * <code>config</code>.
 */
public static Record validate(String recordString, String contributor, Set<String> config) {
    Record record = new Record(contributor);
    RecordParser recordparser = new RecordParser(record, config);
    Result res = recordparser.parse(recordString);
    if (res.isFailure()) {
        logger.error(res.getMessage());
        int position = res.getPosition();
        String[] tokens = recordString.split("\\n");
        int line = 0, col = 0, offset = 0;
        for (String token : tokens) {
            offset = offset + token.length() + 1;
            if (position < offset) {
                col = position - (offset - (token.length() + 1));
                logger.error(tokens[line]);
                StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
                error_at.append('^');
                logger.error(error_at);
                break;
            }
            line++;
        }
        return null;
    }
    return record;
}
Also used : RecordParser(massbank.RecordParser) Record(massbank.Record) Result(org.petitparser.context.Result)

Example 2 with Result

use of org.petitparser.context.Result in project MassBank-web by MassBank.

the class RecordParserDefinitionTest method validate.

private <T> T validate(String source, String production) {
    Parser parser = recordparser.build(production).end();
    Result result = parser.parse(source);
    return result.get();
}
Also used : Parser(org.petitparser.parser.Parser) Result(org.petitparser.context.Result)

Example 3 with Result

use of org.petitparser.context.Result in project MassBank-web by MassBank.

the class Validator2 method validate.

public static Record validate(String recordstring, String contributor) {
    // test non standard ASCII chars and print warnings
    for (int i = 0; i < recordstring.length(); i++) {
        if (recordstring.charAt(i) > 0x7F) {
            String[] tokens = recordstring.split("\\r?\\n");
            System.out.println("Warning: non standard ASCII charactet found. This might be an error. Please check carefully.");
            int line = 0, col = 0, offset = 0;
            for (String token : tokens) {
                offset = offset + token.length() + 1;
                if (i < offset) {
                    col = i - (offset - (token.length() + 1));
                    System.out.println(tokens[line]);
                    StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", tokens[line].length()));
                    error_at.setCharAt(col, '^');
                    System.out.println(error_at);
                    break;
                }
                line++;
            }
        }
    }
    Record record = new Record(contributor);
    Parser recordparser = new RecordParser(record);
    Result res = recordparser.parse(recordstring);
    if (res.isFailure()) {
        System.err.println();
        System.err.println(res.getMessage());
        int position = res.getPosition();
        String[] tokens = recordstring.split("\\n");
        int line = 0, col = 0, offset = 0;
        for (String token : tokens) {
            offset = offset + token.length() + 1;
            if (position < offset) {
                col = position - (offset - (token.length() + 1));
                System.err.println(tokens[line]);
                StringBuilder error_at = new StringBuilder(StringUtils.repeat(" ", col));
                error_at.append('^');
                System.err.println(error_at);
                // record = new Record();
                break;
            }
            line++;
        }
        return null;
    } else
        return record;
}
Also used : RecordParser(massbank.RecordParser) Record(massbank.Record) Parser(org.petitparser.parser.Parser) RecordParser(massbank.RecordParser) Result(org.petitparser.context.Result)

Example 4 with Result

use of org.petitparser.context.Result in project MassBank-web by MassBank.

the class RecordParserDefinition method checkSemantic.

private Result checkSemantic(Function<Context, Result> continuation, Context context, Record callback) {
    Result r = continuation.apply(context);
    if (r.isSuccess() && !callback.DEPRECATED()) {
        // if any structural information is in CH$IUPAC, then CH$FORMULA, CH$SMILES CH$LINK: INCHIKEY must be defined and match
        if (!"N/A".equals(callback.CH_IUPAC())) {
            // compare SMILES
            if ("N/A".equals(callback.CH_SMILES()))
                return context.failure("If CH$IUPAC is defined, CH$SMILES can not be \"N/A\".");
            // compare the structures in CH$SMILES and CH$IUPAC with the help of InChIKeys
            logger.trace("InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES);
            logger.trace("InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
            // in legacy mode only check field1 of InChIKey
            if (legacy) {
                if (InChiKeyFromCH_SMILES.length() != 27 || InChiKeyFromCH_IUPAC.length() != 27 || !InChiKeyFromCH_SMILES.substring(0, 14).equals(InChiKeyFromCH_IUPAC.substring(0, 14))) {
                    return context.failure("InChIKey generated from SMILES string in \"CH$SMILES\" field does not match InChIKey from \"CH$IUPAC\".\n" + "InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES + "\n" + "InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
                }
            } else {
                if (!InChiKeyFromCH_SMILES.equals(InChiKeyFromCH_IUPAC)) {
                    return context.failure("InChIKey generated from SMILES string in \"CH$SMILES\" field does not match InChIKey from \"CH$IUPAC\".\n" + "InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES + "\n" + "InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
                }
            }
            // compare formula
            if ("N/A".equals(callback.CH_FORMULA()))
                return context.failure("If CH$IUPAC is defined, CH$FORMULA can not be \"N/A\".");
            // this code compares the molecular formula from the InChI with CH$FORMULA
            String formulaFromInChI = MolecularFormulaManipulator.getString(MolecularFormulaManipulator.getMolecularFormula(fromCH_IUPAC));
            logger.trace("Formula from CH$FORMULA: " + callback.CH_FORMULA());
            logger.trace("Formula from CH$IUPAC:   " + formulaFromInChI);
            if (!formulaFromInChI.equals(callback.CH_FORMULA())) {
                return context.failure("Formula generated from InChI string in \"CH$IUPAC\" field does not match formula in \"CH$FORMULA\".\n" + "Formula from CH$IUPAC:   " + formulaFromInChI + "\n" + "Formula from CH$FORMULA: " + callback.CH_FORMULA());
            }
            if (!weak) {
                // compare InChIKey
                if (InChiKeyFromCH_LINK.equals("")) {
                    return context.failure("If CH$IUPAC is defined, CH$LINK: INCHIKEY must be defined.");
                }
            }
        } else if (!"N/A".equals(callback.CH_SMILES())) {
            if (!smilesHasWildcards)
                return context.failure("If CH$SMILES is defined, CH$IUPAC can not be \"N/A\".");
            logger.trace("SMILES with wildcards defined");
        }
        // validate the number of peaks in the peaklist
        List<Triple<BigDecimal, BigDecimal, Integer>> pk_peak = callback.PK_PEAK();
        if (pk_peak.size() != pk_num_peak) {
            StringBuilder sb = new StringBuilder();
            sb.append("Incorrect number of peaks in peaklist. ");
            sb.append(pk_num_peak + " peaks are declared in PK$NUM_PEAK line, but " + pk_peak.size() + " peaks are found.\n");
            return context.failure(sb.toString());
        }
        // validate the SPLASH
        List<Ion> ions = new ArrayList<Ion>();
        for (Triple<BigDecimal, BigDecimal, Integer> peak : pk_peak) {
            ions.add(new Ion(peak.getLeft().doubleValue(), peak.getMiddle().doubleValue()));
        }
        Splash splashFactory = SplashFactory.create();
        Spectrum spectrum = new SpectrumImpl(ions, SpectraType.MS);
        String splash_from_peaks = splashFactory.splashIt(spectrum);
        String splash_from_record = callback.PK_SPLASH();
        if (!splash_from_peaks.equals(splash_from_record)) {
            StringBuilder sb = new StringBuilder();
            sb.append("SPLASH from record file does not match SPLASH calculated from peaklist. ");
            sb.append(splash_from_record + " defined in record file, but " + splash_from_peaks + " calculated from peaks.\n");
            return context.failure(sb.toString());
        }
        // check peak sorting
        for (int i = 0; i < pk_peak.size() - 1; i++) {
            if ((pk_peak.get(i).getLeft().compareTo(pk_peak.get(i + 1).getLeft())) >= 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("The peaks in the peak list are not sorted.\n");
                sb.append("Error in line " + pk_peak.get(i).toString() + ".\n");
                return context.failure(sb.toString());
            }
        }
        // check annotation sorting
        List<Pair<BigDecimal, List<String>>> pk_annotation = callback.PK_ANNOTATION();
        for (int i = 0; i < pk_annotation.size() - 1; i++) {
            if ((pk_annotation.get(i).getLeft().compareTo(pk_annotation.get(i + 1).getLeft())) > 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("The peaks in the annotation list are not sorted.\n");
                sb.append("Error in line " + pk_annotation.get(i).toString() + ".\n");
                return context.failure(sb.toString());
            }
        }
        // max 600 characters are supported in database for PUBLICATION
        if (callback.PUBLICATION() != null) {
            if (callback.PUBLICATION().length() > 600) {
                StringBuilder sb = new StringBuilder();
                sb.append("PUBLICATION length exeeds database limit of 600 characters.\n");
                return context.failure(sb.toString());
            }
        }
        // max 600 characters are supported in database for RECORD_TITLE
        if (callback.RECORD_TITLE1().length() > 600) {
            return context.failure("RECORD_TITLE length exeeds database limit of 600 characters.\n");
        }
        // check for duplicate entries in CH$NAME
        List<String> ch_name = callback.CH_NAME();
        Set<String> duplicates = new LinkedHashSet<String>();
        Set<String> uniques = new HashSet<String>();
        for (String c : ch_name) {
            if (!uniques.add(c)) {
                duplicates.add(c);
            }
        }
        if (duplicates.size() > 0) {
            if (!weak) {
                StringBuilder sb = new StringBuilder();
                sb.append("There are duplicate entries in \"CH$NAME\" field.");
                return context.failure(sb.toString());
            } else {
                logger.warn("There are duplicate entries in \"CH$NAME\" field.");
            }
        }
        // check things online
        if (online) {
            if (callback.CH_LINK().containsKey("INCHIKEY")) {
                String inchiKey = callback.CH_LINK().get("INCHIKEY");
                if (callback.CH_LINK().containsKey("PUBCHEM")) {
                    String pubChem = callback.CH_LINK().get("PUBCHEM");
                    PubchemResolver pr = new PubchemResolver(inchiKey);
                    Integer preferredCid = pr.getPreferred();
                    if (preferredCid != null) {
                        if (!pubChem.equals("CID:" + preferredCid)) {
                            StringBuilder sb = new StringBuilder();
                            sb.append("CH$LINK: PUBCHEM lists " + pubChem + "\n");
                            sb.append("but PUG rest reports CID:" + preferredCid + " preferred PubChem CID\n");
                            sb.append("for InChIKey " + inchiKey + ".");
                            return context.failure(sb.toString());
                        }
                    } else {
                        StringBuilder sb = new StringBuilder();
                        sb.append("CH$LINK: PUBCHEM lists " + pubChem + "\n");
                        sb.append("but PUG rest reports no CID\n");
                        sb.append("for InChIKey " + inchiKey + ".");
                        return context.failure(sb.toString());
                    }
                }
            }
        }
    }
    return r;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) Ion(edu.ucdavis.fiehnlab.spectra.hash.core.types.Ion) BigDecimal(java.math.BigDecimal) Result(org.petitparser.context.Result) Spectrum(edu.ucdavis.fiehnlab.spectra.hash.core.Spectrum) Triple(org.apache.commons.lang3.tuple.Triple) Splash(edu.ucdavis.fiehnlab.spectra.hash.core.Splash) SpectrumImpl(edu.ucdavis.fiehnlab.spectra.hash.core.types.SpectrumImpl) Pair(org.apache.commons.lang3.tuple.Pair) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

Result (org.petitparser.context.Result)4 Record (massbank.Record)2 RecordParser (massbank.RecordParser)2 Parser (org.petitparser.parser.Parser)2 Spectrum (edu.ucdavis.fiehnlab.spectra.hash.core.Spectrum)1 Splash (edu.ucdavis.fiehnlab.spectra.hash.core.Splash)1 Ion (edu.ucdavis.fiehnlab.spectra.hash.core.types.Ion)1 SpectrumImpl (edu.ucdavis.fiehnlab.spectra.hash.core.types.SpectrumImpl)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 Pair (org.apache.commons.lang3.tuple.Pair)1 Triple (org.apache.commons.lang3.tuple.Triple)1