Search in sources :

Example 1 with QueryAtom

use of org.openscience.cdk.isomorphism.matchers.QueryAtom in project cdk by cdk.

the class QueryStereoFilter method checkTetrahedral.

/**
 * Verify the tetrahedral stereo-chemistry (clockwise/anticlockwise) of atom
 * {@code u} is preserved in the target when the {@code mapping} is used.
 *
 * @param u       tetrahedral index in the target
 * @param mapping mapping of vertices
 * @return the tetrahedral configuration is preserved
 */
private boolean checkTetrahedral(int u, int[] mapping) {
    int v = mapping[u];
    if (targetTypes[v] != null && targetTypes[v] != Type.Tetrahedral)
        return false;
    ITetrahedralChirality queryElement = (ITetrahedralChirality) queryElements[u];
    ITetrahedralChirality targetElement = (ITetrahedralChirality) targetElements[v];
    IAtom queryAtom = query.getAtom(u);
    IAtom targetAtom = target.getAtom(v);
    // check if unspecified was allowed
    if (targetTypes[v] == null)
        return ((QueryAtom) queryAtom).getExpression().matches(targetAtom, 0);
    // target was non-tetrahedral
    if (targetTypes[v] != Type.Tetrahedral)
        return false;
    int[] us = map(u, v, neighbors(queryElement, queryMap), mapping);
    int[] vs = neighbors(targetElement, targetMap);
    // adjustment needed for implicit neighbor (H or lone pair)
    int focusIdx = targetMap.get(targetAtom);
    for (int i = 0; i < 4; i++) {
        // find mol neighbor in mapped query list
        int j = indexOf(us, vs[i]);
        // (which we store as focusIdx) with this neighbor
        if (j < 0)
            us[indexOf(us, focusIdx)] = vs[i];
    }
    int parity = permutationParity(us) * permutationParity(vs) * parity(targetElement.getStereo());
    int groupInfo = targetElement.getGroupInfo();
    if (groupInfo != 0) {
        if (groupConfigAdjust == null)
            groupConfigAdjust = new int[target.getAtomCount()];
        // initialise the group
        if (groupConfigAdjust[v] == 0) {
            boolean leftOk = ((QueryAtom) queryAtom).getExpression().matches(targetAtom, IStereoElement.LEFT);
            boolean rghtOk = ((QueryAtom) queryAtom).getExpression().matches(targetAtom, IStereoElement.RIGHT);
            // element so wait till we find the next one
            if (leftOk && rghtOk) {
                return true;
            }
            int adjust = 1;
            if ((parity == -1 && !leftOk) || (parity == 1 && !rghtOk))
                adjust = -1;
            for (int idx : targetStereoIndices) {
                if (targetElements[idx].getGroupInfo() == groupInfo) {
                    groupConfigAdjust[idx] = adjust;
                }
            }
        }
        // make the adjustment
        parity *= groupConfigAdjust[v];
    }
    if (parity < 0)
        return ((QueryAtom) queryAtom).getExpression().matches(targetAtom, IStereoElement.LEFT);
    else if (parity > 0)
        return ((QueryAtom) queryAtom).getExpression().matches(targetAtom, IStereoElement.RIGHT);
    else
        return ((QueryAtom) queryAtom).getExpression().matches(targetAtom, 0);
}
Also used : ITetrahedralChirality(org.openscience.cdk.interfaces.ITetrahedralChirality) IAtom(org.openscience.cdk.interfaces.IAtom) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom)

Example 2 with QueryAtom

use of org.openscience.cdk.isomorphism.matchers.QueryAtom in project cdk by cdk.

the class Abbreviations method matchExact.

/**
 * Make a query atom that matches atomic number, h count, valence, and
 * connectivity. This effectively provides an exact match for that atom
 * type.
 *
 * @param mol  molecule
 * @param atom atom of molecule
 * @return the query atom (null if attachment point)
 */
private IQueryAtom matchExact(final IAtomContainer mol, final IAtom atom) {
    final IChemObjectBuilder bldr = atom.getBuilder();
    int elem = atom.getAtomicNumber();
    // attach atom skipped
    if (elem == 0)
        return null;
    int hcnt = atom.getImplicitHydrogenCount();
    int val = hcnt;
    int con = hcnt;
    for (IBond bond : mol.getConnectedBondsList(atom)) {
        val += bond.getOrder().numeric();
        con++;
        if (bond.getOther(atom).getAtomicNumber() == 1)
            hcnt++;
    }
    Expr expr = new Expr(Expr.Type.ELEMENT, elem).and(new Expr(Expr.Type.TOTAL_DEGREE, con)).and(new Expr(Expr.Type.TOTAL_H_COUNT, hcnt)).and(new Expr(Expr.Type.VALENCE, val));
    return new QueryAtom(expr);
}
Also used : Expr(org.openscience.cdk.isomorphism.matchers.Expr) IBond(org.openscience.cdk.interfaces.IBond) IChemObjectBuilder(org.openscience.cdk.interfaces.IChemObjectBuilder) IQueryAtom(org.openscience.cdk.isomorphism.matchers.IQueryAtom) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom)

Example 3 with QueryAtom

use of org.openscience.cdk.isomorphism.matchers.QueryAtom in project cdk by cdk.

the class MDLV2000Writer method writeMolecule.

/**
 * Writes a Molecule to an OutputStream in MDL sdf format.
 *
 * @param container Molecule that is written to an OutputStream
 */
public void writeMolecule(IAtomContainer container) throws Exception {
    final int dim = getNumberOfDimensions(container);
    StringBuilder line = new StringBuilder();
    Map<Integer, Integer> rgroups = null;
    Map<Integer, String> aliases = null;
    // write header block
    // lines get shortened to 80 chars, that's in the spec
    String title = container.getTitle();
    if (title == null)
        title = "";
    if (title.length() > 80)
        title = title.substring(0, 80);
    writer.write(title);
    writer.write('\n');
    /*
         * From CTX spec This line has the format:
         * IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR (FORTRAN:
         * A2<--A8--><---A10-->A2I2<--F10.5-><---F12.5--><-I6-> ) User's first
         * and last initials (l), program name (P), date/time (M/D/Y,H:m),
         * dimensional codes (d), scaling factors (S, s), energy (E) if modeling
         * program input, internal registry number (R) if input through MDL
         * form. A blank line can be substituted for line 2.
         */
    writer.write("  ");
    writer.write(getProgName());
    writer.write(new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis()));
    if (dim != 0) {
        writer.write(Integer.toString(dim));
        writer.write('D');
    }
    writer.write('\n');
    String comment = container.getProperty(CDKConstants.REMARK);
    if (comment == null)
        comment = "";
    if (comment.length() > 80)
        comment = comment.substring(0, 80);
    writer.write(comment);
    writer.write('\n');
    // index stereo elements for setting atom parity values
    Map<IAtom, ITetrahedralChirality> atomstereo = new HashMap<>();
    Map<IAtom, Integer> atomindex = new HashMap<>();
    for (IStereoElement element : container.stereoElements()) if (element instanceof ITetrahedralChirality)
        atomstereo.put(((ITetrahedralChirality) element).getChiralAtom(), (ITetrahedralChirality) element);
    for (IAtom atom : container.atoms()) atomindex.put(atom, atomindex.size());
    // write Counts line
    line.append(formatMDLInt(container.getAtomCount(), 3));
    line.append(formatMDLInt(container.getBondCount(), 3));
    // find all the atoms that should be atom lists
    Map<Integer, IAtom> atomLists = new LinkedHashMap<>();
    for (int f = 0; f < container.getAtomCount(); f++) {
        if (container.getAtom(f) instanceof IQueryAtom) {
            QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
            Expr expr = queryAtom.getExpression();
            if (isValidAtomListExpression(expr)) {
                atomLists.put(f, container.getAtom(f));
            }
        }
    }
    // write number of atom lists
    line.append(formatMDLInt(atomLists.size(), 3));
    line.append("  0");
    line.append(getChiralFlag(atomstereo.values()) ? "  1" : "  0");
    line.append("  0  0  0  0  0999 V2000");
    writer.write(line.toString());
    writer.write('\n');
    // write Atom block
    for (int f = 0; f < container.getAtomCount(); f++) {
        IAtom atom = container.getAtom(f);
        line.setLength(0);
        switch(dim) {
            case 0:
                // if no coordinates available, then output a number
                // of zeros
                line.append("    0.0000    0.0000    0.0000 ");
                break;
            case 2:
                if (atom.getPoint2d() != null) {
                    line.append(formatMDLFloat((float) atom.getPoint2d().x));
                    line.append(formatMDLFloat((float) atom.getPoint2d().y));
                    line.append("    0.0000 ");
                } else {
                    line.append("    0.0000    0.0000    0.0000 ");
                }
                break;
            case 3:
                if (atom.getPoint3d() != null) {
                    line.append(formatMDLFloat((float) atom.getPoint3d().x));
                    line.append(formatMDLFloat((float) atom.getPoint3d().y));
                    line.append(formatMDLFloat((float) atom.getPoint3d().z)).append(" ");
                } else {
                    line.append("    0.0000    0.0000    0.0000 ");
                }
                break;
        }
        if (container.getAtom(f) instanceof IPseudoAtom) {
            // according to http://www.google.co.uk/url?sa=t&ct=res&cd=2&url=http%3A%2F%2Fwww.mdl.com%2Fdownloads%2Fpublic%2Fctfile%2Fctfile.pdf&ei=MsJjSMbjAoyq1gbmj7zCDQ&usg=AFQjCNGaJSvH4wYy4FTXIaQ5f7hjoTdBAw&sig2=eSfruNOSsdMFdlrn7nhdAw an R group is written as R#
            IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(f);
            String label = pseudoAtom.getLabel();
            if (// set to empty string if null
            label == null)
                label = "";
            // firstly check if it's a numbered R group
            Matcher matcher = NUMERED_R_GROUP.matcher(label);
            if (pseudoAtom.getAtomicNumber() == IElement.Wildcard && !label.isEmpty() && matcher.matches()) {
                line.append("R# ");
                if (rgroups == null) {
                    // we use a tree map to ensure the output order is always the same
                    rgroups = new TreeMap<>();
                }
                rgroups.put(f + 1, Integer.parseInt(matcher.group(1)));
            } else // not a numbered R group - note the symbol may still be R
            {
                // to use an alias.
                if (label.length() > 3) {
                    if (aliases == null)
                        aliases = new TreeMap<>();
                    // atom index to alias
                    aliases.put(f + 1, label);
                    line.append(formatMDLString(atom.getSymbol(), 3));
                } else {
                    // make sure it's not empty
                    if (!label.isEmpty())
                        line.append(formatMDLString(label, 3));
                    else
                        line.append(formatMDLString(atom.getSymbol(), 3));
                }
            }
        } else if (atomLists.containsKey(f)) {
            line.append(formatMDLString("L", 3));
        } else {
            line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
        }
        // atom properties
        int[] atomprops = new int[12];
        atomprops[0] = determineIsotope(atom);
        atomprops[1] = determineCharge(container, atom);
        atomprops[2] = determineStereoParity(container, atomstereo, atomindex, atom);
        atomprops[5] = determineValence(container, atom);
        atomprops[9] = determineAtomMap(atom);
        // dd (mass-number)
        line.append(formatMDLInt(atomprops[0], 2));
        // ccc (charge)
        line.append(formatMDLInt(atomprops[1], 3));
        int last = atomprops.length - 1;
        if (!writeDefaultProps.isSet()) {
            while (last >= 0) {
                if (atomprops[last] != 0)
                    break;
                last--;
            }
            // matches BIOVIA syntax
            if (last >= 2 && last < 5)
                last = 5;
        }
        for (int i = 2; i <= last; i++) line.append(formatMDLInt(atomprops[i], 3));
        line.append('\n');
        writer.write(line.toString());
    }
    // write Bond block
    for (IBond bond : container.bonds()) {
        line.setLength(0);
        if (bond.getAtomCount() != 2) {
            logger.warn("Skipping bond with more/less than two atoms: " + bond);
        } else {
            if (bond.getStereo() == IBond.Stereo.UP_INVERTED || bond.getStereo() == IBond.Stereo.DOWN_INVERTED || bond.getStereo() == IBond.Stereo.UP_OR_DOWN_INVERTED) {
                // turn around atom coding to correct for inv stereo
                line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
                line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
            } else {
                line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
                line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
            }
            int bondType = 0;
            if (bond instanceof QueryBond) {
                QueryBond qbond = ((QueryBond) bond);
                Expr e = qbond.getExpression();
                switch(e.type()) {
                    case ALIPHATIC_ORDER:
                    case ORDER:
                        bondType = e.value();
                        break;
                    case IS_AROMATIC:
                        bondType = 4;
                        break;
                    case SINGLE_OR_DOUBLE:
                        bondType = 5;
                        break;
                    case SINGLE_OR_AROMATIC:
                        bondType = 6;
                        break;
                    case DOUBLE_OR_AROMATIC:
                        bondType = 7;
                        break;
                    case TRUE:
                        bondType = 8;
                        break;
                    case OR:
                        // SINGLE_OR_DOUBLE
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))) || e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
                            bondType = 5;
                        else // SINGLE_OR_AROMATIC
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
                            bondType = 6;
                        else // DOUBLE_OR_AROMATIC
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))))
                            bondType = 6;
                        break;
                    default:
                        throw new IllegalArgumentException("Unsupported bond type!");
                }
            } else {
                if (bond.getOrder() != null) {
                    switch(bond.getOrder()) {
                        case SINGLE:
                        case DOUBLE:
                        case TRIPLE:
                            if (writeAromaticBondTypes.isSet() && bond.isAromatic())
                                bondType = 4;
                            else
                                bondType = bond.getOrder().numeric();
                            break;
                        case UNSET:
                            if (bond.isAromatic()) {
                                if (!writeAromaticBondTypes.isSet())
                                    throw new CDKException("Bond at idx " + container.indexOf(bond) + " was an unspecific aromatic bond which should only be used for queries in Molfiles. These can be written if desired by enabling the option 'WriteAromaticBondTypes'.");
                                bondType = 4;
                            }
                            break;
                    }
                }
            }
            if (bondType == 0)
                throw new CDKException("Bond at idx=" + container.indexOf(bond) + " is not supported by Molfile, bond=" + bond.getOrder());
            line.append(formatMDLInt(bondType, 3));
            line.append("  ");
            switch(bond.getStereo()) {
                case UP:
                    line.append("1");
                    break;
                case UP_INVERTED:
                    line.append("1");
                    break;
                case DOWN:
                    line.append("6");
                    break;
                case DOWN_INVERTED:
                    line.append("6");
                    break;
                case UP_OR_DOWN:
                    line.append("4");
                    break;
                case UP_OR_DOWN_INVERTED:
                    line.append("4");
                    break;
                case E_OR_Z:
                    line.append("3");
                    break;
                default:
                    line.append("0");
            }
            if (writeDefaultProps.isSet())
                line.append("  0  0  0");
            line.append('\n');
            writer.write(line.toString());
        }
    }
    // Write Atom Value
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        if (atom.getProperty(CDKConstants.COMMENT) != null && atom.getProperty(CDKConstants.COMMENT) instanceof String && !((String) atom.getProperty(CDKConstants.COMMENT)).trim().equals("")) {
            writer.write("V  ");
            writer.write(formatMDLInt(i + 1, 3));
            writer.write(" ");
            writer.write((String) atom.getProperty(CDKConstants.COMMENT));
            writer.write('\n');
        }
    }
    // write formal atomic charges
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        Integer charge = atom.getFormalCharge();
        if (charge != null && charge != 0) {
            writer.write("M  CHG  1 ");
            writer.write(formatMDLInt(i + 1, 3));
            writer.write(" ");
            writer.write(formatMDLInt(charge, 3));
            writer.write('\n');
        }
    }
    // write radical information
    if (container.getSingleElectronCount() > 0) {
        Map<Integer, SPIN_MULTIPLICITY> atomIndexSpinMap = new LinkedHashMap<>();
        for (int i = 0; i < container.getAtomCount(); i++) {
            IAtom atom = container.getAtom(i);
            int eCount = container.getConnectedSingleElectronsCount(atom);
            switch(eCount) {
                case 0:
                    continue;
                case 1:
                    atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent);
                    break;
                case 2:
                    SPIN_MULTIPLICITY multiplicity = atom.getProperty(CDKConstants.SPIN_MULTIPLICITY);
                    if (multiplicity != null)
                        atomIndexSpinMap.put(i, multiplicity);
                    else {
                        // information loss, divalent but singlet or triplet?
                        atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet);
                    }
                    break;
                default:
                    logger.debug("Invalid number of radicals found: " + eCount);
                    break;
            }
        }
        Iterator<Map.Entry<Integer, SPIN_MULTIPLICITY>> iterator = atomIndexSpinMap.entrySet().iterator();
        for (int i = 0; i < atomIndexSpinMap.size(); i += NN8) {
            if (atomIndexSpinMap.size() - i <= NN8) {
                writer.write("M  RAD" + formatMDLInt(atomIndexSpinMap.size() - i, WIDTH));
                writeRadicalPattern(iterator, 0);
            } else {
                writer.write("M  RAD" + formatMDLInt(NN8, WIDTH));
                writeRadicalPattern(iterator, 0);
            }
            writer.write('\n');
        }
    }
    // write formal isotope information
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        if (!(atom instanceof IPseudoAtom)) {
            Integer atomicMass = atom.getMassNumber();
            if (!writeMajorIsotopes.isSet() && isMajorIsotope(atom))
                atomicMass = null;
            if (atomicMass != null) {
                writer.write("M  ISO  1 ");
                writer.write(formatMDLInt(i + 1, 3));
                writer.write(" ");
                writer.write(formatMDLInt(atomicMass, 3));
                writer.write('\n');
            }
        }
    }
    // write RGP line (max occurrence is 16 data points per line)
    if (rgroups != null) {
        StringBuilder rgpLine = new StringBuilder();
        int cnt = 0;
        // number this isn't an issue
        for (Map.Entry<Integer, Integer> e : rgroups.entrySet()) {
            rgpLine.append(formatMDLInt(e.getKey(), 4));
            rgpLine.append(formatMDLInt(e.getValue(), 4));
            cnt++;
            if (cnt == 8) {
                rgpLine.insert(0, "M  RGP" + formatMDLInt(cnt, 3));
                writer.write(rgpLine.toString());
                writer.write('\n');
                rgpLine = new StringBuilder();
                cnt = 0;
            }
        }
        if (cnt != 0) {
            rgpLine.insert(0, "M  RGP" + formatMDLInt(cnt, 3));
            writer.write(rgpLine.toString());
            writer.write('\n');
        }
    }
    // write atom aliases
    if (aliases != null) {
        for (Map.Entry<Integer, String> e : aliases.entrySet()) {
            writer.write("A" + formatMDLInt(e.getKey(), 5));
            writer.write('\n');
            String label = e.getValue();
            // fixed width file - doubtful someone would have a label > 70 but trim if they do
            if (label.length() > 70)
                label = label.substring(0, 70);
            writer.write(label);
            writer.write('\n');
        }
    }
    // write atom lists
    writeAtomLists(atomLists, writer);
    writeSgroups(container, writer, atomindex);
    // close molecule
    writer.write("M  END");
    writer.write('\n');
    writer.flush();
}
Also used : IPseudoAtom(org.openscience.cdk.interfaces.IPseudoAtom) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Matcher(java.util.regex.Matcher) IBond(org.openscience.cdk.interfaces.IBond) LinkedHashMap(java.util.LinkedHashMap) IQueryAtom(org.openscience.cdk.isomorphism.matchers.IQueryAtom) IAtom(org.openscience.cdk.interfaces.IAtom) CDKException(org.openscience.cdk.exception.CDKException) TreeMap(java.util.TreeMap) IQueryAtom(org.openscience.cdk.isomorphism.matchers.IQueryAtom) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom) Expr(org.openscience.cdk.isomorphism.matchers.Expr) ITetrahedralChirality(org.openscience.cdk.interfaces.ITetrahedralChirality) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) SimpleDateFormat(java.text.SimpleDateFormat) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) AbstractMap(java.util.AbstractMap) TreeMap(java.util.TreeMap) IStereoElement(org.openscience.cdk.interfaces.IStereoElement)

Example 4 with QueryAtom

use of org.openscience.cdk.isomorphism.matchers.QueryAtom in project cdk by cdk.

the class MDLV2000Reader method readAtomContainer.

/**
 * Read an IAtomContainer from a file in MDL sd format
 *
 * @return The Molecule that was read from the MDL file.
 */
private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException {
    boolean isQuery = molecule instanceof IQueryAtomContainer;
    IAtomContainer outputContainer = null;
    Map<IAtom, Integer> parities = new HashMap<>();
    int linecount = 0;
    String title = null;
    String program;
    String remark = null;
    String line = "";
    try {
        line = input.readLine();
        linecount++;
        if (line == null) {
            return null;
        }
        if (line.startsWith("$$$$")) {
            return molecule;
        }
        if (line.length() > 0) {
            title = line;
        }
        line = input.readLine();
        linecount++;
        program = line;
        line = input.readLine();
        linecount++;
        if (line.length() > 0) {
            remark = line;
        }
        line = input.readLine();
        linecount++;
        // molecule entry or just extra new lines at the end of the file
        if (line.length() == 0) {
            handleError("Unexpected empty line", linecount, 0, 0);
            // read till the next $$$$ or EOF
            while (true) {
                line = input.readLine();
                linecount++;
                if (line == null) {
                    return null;
                }
                if (line.startsWith("$$$$")) {
                    // an empty molecule
                    return molecule;
                }
            }
        }
        final CTabVersion version = CTabVersion.ofHeader(line);
        // check the CT block version
        if (version == CTabVersion.V3000) {
            handleError("This file must be read with the MDLV3000Reader.");
            // even if relaxed we can't read V3000 using the V2000 parser
            throw new CDKException("This file must be read with the MDLV3000Reader.");
        } else if (version == CTabVersion.UNSPECIFIED) {
            handleError("This file must be read with the MDLReader.");
        // okay to read in relaxed mode
        }
        int nAtoms = readMolfileInt(line, 0);
        int nBonds = readMolfileInt(line, 3);
        int chiral = readMolfileInt(line, 13);
        final IAtom[] atoms = new IAtom[nAtoms];
        final IBond[] bonds = new IBond[nBonds];
        // used for applying the MDL valence model
        int[] explicitValence = new int[nAtoms];
        boolean hasX = false, hasY = false, hasZ = false;
        for (int i = 0; i < nAtoms; i++) {
            line = input.readLine();
            linecount++;
            final IAtom atom = readAtomFast(line, molecule.getBuilder(), parities, linecount, isQuery);
            atoms[i] = atom;
            Point3d p = atom.getPoint3d();
            hasX = hasX || p.x != 0d;
            hasY = hasY || p.y != 0d;
            hasZ = hasZ || p.z != 0d;
        }
        // convert to 2D, if totalZ == 0
        if (!hasX && !hasY && !hasZ) {
            if (nAtoms == 1) {
                atoms[0].setPoint2d(new Point2d(0, 0));
            } else {
                for (IAtom atomToUpdate : atoms) {
                    atomToUpdate.setPoint3d(null);
                }
            }
        } else if (!hasZ) {
            // 0123456789012345678901
            if (is3Dfile(program)) {
                hasZ = true;
            } else if (!optForce3d.isSet()) {
                for (IAtom atomToUpdate : atoms) {
                    Point3d p3d = atomToUpdate.getPoint3d();
                    if (p3d != null) {
                        atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y));
                        atomToUpdate.setPoint3d(null);
                    }
                }
            }
        }
        for (int i = 0; i < nBonds; i++) {
            line = input.readLine();
            linecount++;
            bonds[i] = readBondFast(line, molecule.getBuilder(), atoms, explicitValence, linecount, isQuery);
            isQuery = isQuery || bonds[i] instanceof IQueryBond || (bonds[i].getOrder() == IBond.Order.UNSET && !bonds[i].isAromatic());
        }
        if (!isQuery)
            outputContainer = molecule;
        else
            outputContainer = new QueryAtomContainer(molecule.getBuilder());
        if (title != null)
            outputContainer.setTitle(title);
        if (remark != null)
            outputContainer.setProperty(CDKConstants.REMARK, remark);
        // otherwise we add them to the end
        if (outputContainer.isEmpty()) {
            outputContainer.setAtoms(atoms);
            outputContainer.setBonds(bonds);
        } else {
            for (IAtom atom : atoms) outputContainer.addAtom(atom);
            for (IBond bond : bonds) outputContainer.addBond(bond);
        }
        // create 0D stereochemistry
        if (optStereoPerc.isSet() && optStereo0d.isSet()) {
            for (Map.Entry<IAtom, Integer> e : parities.entrySet()) {
                IStereoElement<IAtom, IAtom> stereoElement = createStereo0d(outputContainer, e.getKey(), e.getValue());
                if (stereoElement != null)
                    molecule.addStereoElement(stereoElement);
            }
        }
        // read PROPERTY block
        readPropertiesFast(input, outputContainer, nAtoms);
        // read potential SD file data between M  END and $$$$
        readNonStructuralData(input, outputContainer);
        // note: apply the valence model last so that all fixes (i.e. hydrogen
        // isotopes) are in place we need to use a offset as this atoms
        // could be added to a molecule which already had atoms present
        int offset = outputContainer.getAtomCount() - nAtoms;
        for (int i = offset; i < outputContainer.getAtomCount(); i++) {
            int valence = explicitValence[i - offset];
            if (valence < 0) {
                // also counts aromatic bond as query
                isQuery = true;
            } else {
                int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
                applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired);
            }
        }
        // currently possible
        if (!(outputContainer instanceof IQueryAtomContainer) && !isQuery && optStereoPerc.isSet() && hasX && hasY) {
            // ALS property could have changed an atom into a QueryAtom
            for (IAtom atom : outputContainer.atoms()) {
                if (AtomRef.deref(atom) instanceof QueryAtom) {
                    isQuery = true;
                    break;
                }
            }
            if (!isQuery) {
                if (hasZ) {
                    // has 3D coordinates
                    outputContainer.setStereoElements(StereoElementFactory.using3DCoordinates(outputContainer).createAll());
                } else if (!optForce3d.isSet()) {
                    // has 2D coordinates (set as 2D coordinates)
                    outputContainer.setStereoElements(StereoElementFactory.using2DCoordinates(outputContainer).createAll());
                }
            }
        }
        // Tetrahedral stereo as AND1 (&1)
        if (chiral == 0) {
            for (IStereoElement<?, ?> se : outputContainer.stereoElements()) {
                if (se.getConfigClass() == IStereoElement.TH) {
                    se.setGroupInfo(IStereoElement.GRP_RAC1);
                }
            }
        }
    } catch (CDKException exception) {
        String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
        logger.error(error);
        throw exception;
    } catch (IOException exception) {
        String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
        logger.error(error);
        handleError("Error while parsing line: " + line, linecount, 0, 0, exception);
    }
    return outputContainer;
}
Also used : IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) IBond(org.openscience.cdk.interfaces.IBond) IQueryBond(org.openscience.cdk.isomorphism.matchers.IQueryBond) Point3d(javax.vecmath.Point3d) IAtom(org.openscience.cdk.interfaces.IAtom) CDKException(org.openscience.cdk.exception.CDKException) IOException(java.io.IOException) IQueryAtomContainer(org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer) QueryAtomContainer(org.openscience.cdk.isomorphism.matchers.QueryAtomContainer) IQueryAtomContainer(org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom) Point2d(javax.vecmath.Point2d) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 5 with QueryAtom

use of org.openscience.cdk.isomorphism.matchers.QueryAtom in project cdk by cdk.

the class MDLV2000Reader method readAtomFast.

/**
 * Parse an atom line from the atom block using the format: {@code
 * xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee}
 * where: <ul> <li>x: x coordinate</li> <li>y: y coordinate</li> <li>z: z
 * coordinate</li> <li>a: atom symbol</li> <li>d: mass difference</li>
 * <li>c: charge</li> <li>s: stereo parity</li> <li>h: hydrogen count + 1
 * (not read - query)</li> <li>b: stereo care (not read - query)</li> <li>v:
 * valence</li> <li>H: H0 designator (not read - query)</li> <li>r: not
 * used</li> <li>i: not used</li> <li>m: atom reaction mapping</li> <li>n:
 * inversion/retention flag</li> <li>e: exact change flag</li> </ul>
 *
 * The parsing is strict and does not allow extra columns (i.e. NMR shifts)
 * malformed input.
 *
 * @param line    input line
 * @param builder chem object builder to create the atom
 * @param parities map of atom parities for creation 0D stereochemistry
 * @param lineNum the line number - for printing error messages
 * @return a new atom instance
 */
IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom, Integer> parities, int lineNum, boolean isQuery) throws CDKException, IOException {
    // The line may be truncated and it's checked in reverse at the specified
    // lengths:
    // 1         2         3         4         5         6
    // 123456789012345678901234567890123456789012345678901234567890123456789
    // | |  |  |  |  |  |  |  |  |  |  |  |
    // xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
    String symbol;
    double x, y, z;
    int massDiff = 0, charge = 0, parity = 0, valence = 0, mapping = 0, hcount = 0;
    int length = length(line);
    if (// excess data we should check all fields
    length > 69)
        length = 69;
    // that could be present (note - fall through switch)
    switch(length) {
        // eee: exact charge flag [reaction, query]
        case 69:
        // nnn: inversion / retention [reaction]
        case 66:
        case // mmm: atom-atom mapping [reaction]
        63:
            mapping = readMolfileInt(line, 60);
        // iii: not used
        case 60:
        // rrr: not used
        case 57:
        // HHH: H0 designation [redundant]
        case 54:
        case // vvv: valence
        51:
            valence = readMolfileInt(line, 48);
        // bbb: stereo care [query]
        case 48:
        case // hhh: hydrogen count + 1 [query]
        45:
            hcount = readMolfileInt(line, 42);
        case // sss: stereo parity
        42:
            parity = toInt(line.charAt(41));
        // case 40: SAChem: I don't think this can happen in a valid molfile, maybe with a trailing tab?
        case // ccc: charge
        39:
            charge = toCharge(line.charAt(38));
        case // dd: mass difference
        36:
            massDiff = sign(line.charAt(34)) * toInt(line.charAt(35));
        // x y z and aaa: atom coordinates and symbol
        case 34:
        // symbol is left aligned
        case 33:
        case 32:
            x = readMDLCoordinate(line, 0);
            y = readMDLCoordinate(line, 10);
            z = readMDLCoordinate(line, 20);
            symbol = line.substring(31, 34).trim().intern();
            break;
        default:
            handleError("invalid line length", lineNum, 0, 0);
            throw new CDKException("invalid line length, " + length + ": " + line);
    }
    IAtom atom = createAtom(symbol, builder, lineNum);
    if (isQuery) {
        Expr expr = new Expr(Expr.Type.ELEMENT, atom.getAtomicNumber());
        if (hcount != 0) {
            if (hcount < 0)
                hcount = 0;
            expr.and(new Expr(Expr.Type.IMPL_H_COUNT, hcount));
        }
        atom = new QueryAtom(builder);
        ((QueryAtom) atom).setExpression(expr);
    }
    atom.setPoint3d(new Point3d(x, y, z));
    atom.setFormalCharge(charge);
    atom.setStereoParity(parity);
    if (parity != 0)
        parities.put(atom, parity);
    // if there was a mass difference, set the mass number
    if (massDiff != 0 && atom.getAtomicNumber() > 0) {
        IIsotope majorIsotope = Isotopes.getInstance().getMajorIsotope(atom.getAtomicNumber());
        if (majorIsotope == null)
            // checked after M ISO is processed
            atom.setMassNumber(-1);
        else
            atom.setMassNumber(majorIsotope.getMassNumber() + massDiff);
    }
    if (valence > 0 && valence < 16)
        atom.setValency(valence == 15 ? 0 : valence);
    if (mapping != 0)
        atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, mapping);
    return atom;
}
Also used : IIsotope(org.openscience.cdk.interfaces.IIsotope) Expr(org.openscience.cdk.isomorphism.matchers.Expr) CDKException(org.openscience.cdk.exception.CDKException) Point3d(javax.vecmath.Point3d) IAtom(org.openscience.cdk.interfaces.IAtom) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom)

Aggregations

QueryAtom (org.openscience.cdk.isomorphism.matchers.QueryAtom)12 IAtom (org.openscience.cdk.interfaces.IAtom)11 Expr (org.openscience.cdk.isomorphism.matchers.Expr)9 IQueryAtomContainer (org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer)5 QueryAtomContainer (org.openscience.cdk.isomorphism.matchers.QueryAtomContainer)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 InputStream (java.io.InputStream)4 LinkedHashMap (java.util.LinkedHashMap)4 Test (org.junit.Test)4 CDKException (org.openscience.cdk.exception.CDKException)4 SimpleChemObjectReaderTest (org.openscience.cdk.test.io.SimpleChemObjectReaderTest)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 IBond (org.openscience.cdk.interfaces.IBond)3 ITetrahedralChirality (org.openscience.cdk.interfaces.ITetrahedralChirality)3 IQueryAtom (org.openscience.cdk.isomorphism.matchers.IQueryAtom)3 AbstractMap (java.util.AbstractMap)2 ArrayList (java.util.ArrayList)2 TreeMap (java.util.TreeMap)2 Point3d (javax.vecmath.Point3d)2