Search in sources :

Example 1 with Ion

use of edu.ucdavis.fiehnlab.spectra.hash.core.types.Ion in project MassBank-web by MassBank.

the class RecordParserDefinition method checkSemantic.

private Result checkSemantic(Function<Context, Result> continuation, Context context, Record callback) {
    Result r = continuation.apply(context);
    if (r.isSuccess() && !callback.DEPRECATED()) {
        // if any structural information is in CH$IUPAC, then CH$FORMULA, CH$SMILES CH$LINK: INCHIKEY must be defined and match
        if (!"N/A".equals(callback.CH_IUPAC())) {
            // compare SMILES
            if ("N/A".equals(callback.CH_SMILES()))
                return context.failure("If CH$IUPAC is defined, CH$SMILES can not be \"N/A\".");
            // compare the structures in CH$SMILES and CH$IUPAC with the help of InChIKeys
            logger.trace("InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES);
            logger.trace("InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
            // in legacy mode only check field1 of InChIKey
            if (legacy) {
                if (InChiKeyFromCH_SMILES.length() != 27 || InChiKeyFromCH_IUPAC.length() != 27 || !InChiKeyFromCH_SMILES.substring(0, 14).equals(InChiKeyFromCH_IUPAC.substring(0, 14))) {
                    return context.failure("InChIKey generated from SMILES string in \"CH$SMILES\" field does not match InChIKey from \"CH$IUPAC\".\n" + "InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES + "\n" + "InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
                }
            } else {
                if (!InChiKeyFromCH_SMILES.equals(InChiKeyFromCH_IUPAC)) {
                    return context.failure("InChIKey generated from SMILES string in \"CH$SMILES\" field does not match InChIKey from \"CH$IUPAC\".\n" + "InChIKey from CH$SMILES: " + InChiKeyFromCH_SMILES + "\n" + "InChIKey from CH$IUPAC:  " + InChiKeyFromCH_IUPAC);
                }
            }
            // compare formula
            if ("N/A".equals(callback.CH_FORMULA()))
                return context.failure("If CH$IUPAC is defined, CH$FORMULA can not be \"N/A\".");
            // this code compares the molecular formula from the InChI with CH$FORMULA
            String formulaFromInChI = MolecularFormulaManipulator.getString(MolecularFormulaManipulator.getMolecularFormula(fromCH_IUPAC));
            logger.trace("Formula from CH$FORMULA: " + callback.CH_FORMULA());
            logger.trace("Formula from CH$IUPAC:   " + formulaFromInChI);
            if (!formulaFromInChI.equals(callback.CH_FORMULA())) {
                return context.failure("Formula generated from InChI string in \"CH$IUPAC\" field does not match formula in \"CH$FORMULA\".\n" + "Formula from CH$IUPAC:   " + formulaFromInChI + "\n" + "Formula from CH$FORMULA: " + callback.CH_FORMULA());
            }
            if (!weak) {
                // compare InChIKey
                if (InChiKeyFromCH_LINK.equals("")) {
                    return context.failure("If CH$IUPAC is defined, CH$LINK: INCHIKEY must be defined.");
                }
            }
        } else if (!"N/A".equals(callback.CH_SMILES())) {
            if (!smilesHasWildcards)
                return context.failure("If CH$SMILES is defined, CH$IUPAC can not be \"N/A\".");
            logger.trace("SMILES with wildcards defined");
        }
        // validate the number of peaks in the peaklist
        List<Triple<BigDecimal, BigDecimal, Integer>> pk_peak = callback.PK_PEAK();
        if (pk_peak.size() != pk_num_peak) {
            StringBuilder sb = new StringBuilder();
            sb.append("Incorrect number of peaks in peaklist. ");
            sb.append(pk_num_peak + " peaks are declared in PK$NUM_PEAK line, but " + pk_peak.size() + " peaks are found.\n");
            return context.failure(sb.toString());
        }
        // validate the SPLASH
        List<Ion> ions = new ArrayList<Ion>();
        for (Triple<BigDecimal, BigDecimal, Integer> peak : pk_peak) {
            ions.add(new Ion(peak.getLeft().doubleValue(), peak.getMiddle().doubleValue()));
        }
        Splash splashFactory = SplashFactory.create();
        Spectrum spectrum = new SpectrumImpl(ions, SpectraType.MS);
        String splash_from_peaks = splashFactory.splashIt(spectrum);
        String splash_from_record = callback.PK_SPLASH();
        if (!splash_from_peaks.equals(splash_from_record)) {
            StringBuilder sb = new StringBuilder();
            sb.append("SPLASH from record file does not match SPLASH calculated from peaklist. ");
            sb.append(splash_from_record + " defined in record file, but " + splash_from_peaks + " calculated from peaks.\n");
            return context.failure(sb.toString());
        }
        // check peak sorting
        for (int i = 0; i < pk_peak.size() - 1; i++) {
            if ((pk_peak.get(i).getLeft().compareTo(pk_peak.get(i + 1).getLeft())) >= 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("The peaks in the peak list are not sorted.\n");
                sb.append("Error in line " + pk_peak.get(i).toString() + ".\n");
                return context.failure(sb.toString());
            }
        }
        // check annotation sorting
        List<Pair<BigDecimal, List<String>>> pk_annotation = callback.PK_ANNOTATION();
        for (int i = 0; i < pk_annotation.size() - 1; i++) {
            if ((pk_annotation.get(i).getLeft().compareTo(pk_annotation.get(i + 1).getLeft())) > 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("The peaks in the annotation list are not sorted.\n");
                sb.append("Error in line " + pk_annotation.get(i).toString() + ".\n");
                return context.failure(sb.toString());
            }
        }
        // max 600 characters are supported in database for PUBLICATION
        if (callback.PUBLICATION() != null) {
            if (callback.PUBLICATION().length() > 600) {
                StringBuilder sb = new StringBuilder();
                sb.append("PUBLICATION length exeeds database limit of 600 characters.\n");
                return context.failure(sb.toString());
            }
        }
        // max 600 characters are supported in database for RECORD_TITLE
        if (callback.RECORD_TITLE1().length() > 600) {
            return context.failure("RECORD_TITLE length exeeds database limit of 600 characters.\n");
        }
        // check for duplicate entries in CH$NAME
        List<String> ch_name = callback.CH_NAME();
        Set<String> duplicates = new LinkedHashSet<String>();
        Set<String> uniques = new HashSet<String>();
        for (String c : ch_name) {
            if (!uniques.add(c)) {
                duplicates.add(c);
            }
        }
        if (duplicates.size() > 0) {
            if (!weak) {
                StringBuilder sb = new StringBuilder();
                sb.append("There are duplicate entries in \"CH$NAME\" field.");
                return context.failure(sb.toString());
            } else {
                logger.warn("There are duplicate entries in \"CH$NAME\" field.");
            }
        }
        // check things online
        if (online) {
            if (callback.CH_LINK().containsKey("INCHIKEY")) {
                String inchiKey = callback.CH_LINK().get("INCHIKEY");
                if (callback.CH_LINK().containsKey("PUBCHEM")) {
                    String pubChem = callback.CH_LINK().get("PUBCHEM");
                    PubchemResolver pr = new PubchemResolver(inchiKey);
                    Integer preferredCid = pr.getPreferred();
                    if (preferredCid != null) {
                        if (!pubChem.equals("CID:" + preferredCid)) {
                            StringBuilder sb = new StringBuilder();
                            sb.append("CH$LINK: PUBCHEM lists " + pubChem + "\n");
                            sb.append("but PUG rest reports CID:" + preferredCid + " preferred PubChem CID\n");
                            sb.append("for InChIKey " + inchiKey + ".");
                            return context.failure(sb.toString());
                        }
                    } else {
                        StringBuilder sb = new StringBuilder();
                        sb.append("CH$LINK: PUBCHEM lists " + pubChem + "\n");
                        sb.append("but PUG rest reports no CID\n");
                        sb.append("for InChIKey " + inchiKey + ".");
                        return context.failure(sb.toString());
                    }
                }
            }
        }
    }
    return r;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) Ion(edu.ucdavis.fiehnlab.spectra.hash.core.types.Ion) BigDecimal(java.math.BigDecimal) Result(org.petitparser.context.Result) Spectrum(edu.ucdavis.fiehnlab.spectra.hash.core.Spectrum) Triple(org.apache.commons.lang3.tuple.Triple) Splash(edu.ucdavis.fiehnlab.spectra.hash.core.Splash) SpectrumImpl(edu.ucdavis.fiehnlab.spectra.hash.core.types.SpectrumImpl) Pair(org.apache.commons.lang3.tuple.Pair) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

Spectrum (edu.ucdavis.fiehnlab.spectra.hash.core.Spectrum)1 Splash (edu.ucdavis.fiehnlab.spectra.hash.core.Splash)1 Ion (edu.ucdavis.fiehnlab.spectra.hash.core.types.Ion)1 SpectrumImpl (edu.ucdavis.fiehnlab.spectra.hash.core.types.SpectrumImpl)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 Pair (org.apache.commons.lang3.tuple.Pair)1 Triple (org.apache.commons.lang3.tuple.Triple)1 Result (org.petitparser.context.Result)1