Search in sources :

Example 1 with QueryBond

use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.

the class Abbreviations method matchExact.

/**
 * Internal - create a query atom container that exactly matches the molecule provided.
 * Similar to {@link org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator}
 * but we can't access SMARTS query classes from that module (cdk-isomorphism).
 *
 * @param mol molecule
 * @return query container
 * @see org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator
 */
private IQueryAtomContainer matchExact(IAtomContainer mol) {
    final IChemObjectBuilder bldr = mol.getBuilder();
    final IQueryAtomContainer qry = new QueryAtomContainer(mol.getBuilder());
    final Map<IAtom, IAtom> atmmap = new HashMap<>();
    for (IAtom atom : mol.atoms()) {
        IAtom qatom = matchExact(mol, atom);
        if (qatom != null) {
            atmmap.put(atom, qatom);
            qry.addAtom(qatom);
        }
    }
    for (IBond bond : mol.bonds()) {
        final IAtom beg = atmmap.get(bond.getBegin());
        final IAtom end = atmmap.get(bond.getEnd());
        // attach bond skipped
        if (beg == null || end == null)
            continue;
        IQueryBond qbond = new QueryBond(beg, end, Expr.Type.TRUE);
        qry.addBond(qbond);
    }
    return qry;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) IBond(org.openscience.cdk.interfaces.IBond) IQueryBond(org.openscience.cdk.isomorphism.matchers.IQueryBond) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) IQueryBond(org.openscience.cdk.isomorphism.matchers.IQueryBond) IQueryAtomContainer(org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer) IChemObjectBuilder(org.openscience.cdk.interfaces.IChemObjectBuilder) QueryAtomContainer(org.openscience.cdk.isomorphism.matchers.QueryAtomContainer) IQueryAtomContainer(org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer) IAtom(org.openscience.cdk.interfaces.IAtom)

Example 2 with QueryBond

use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.

the class MDLV2000Writer method writeMolecule.

/**
 * Writes a Molecule to an OutputStream in MDL sdf format.
 *
 * @param container Molecule that is written to an OutputStream
 */
public void writeMolecule(IAtomContainer container) throws Exception {
    final int dim = getNumberOfDimensions(container);
    StringBuilder line = new StringBuilder();
    Map<Integer, Integer> rgroups = null;
    Map<Integer, String> aliases = null;
    // write header block
    // lines get shortened to 80 chars, that's in the spec
    String title = container.getTitle();
    if (title == null)
        title = "";
    if (title.length() > 80)
        title = title.substring(0, 80);
    writer.write(title);
    writer.write('\n');
    /*
         * From CTX spec This line has the format:
         * IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR (FORTRAN:
         * A2<--A8--><---A10-->A2I2<--F10.5-><---F12.5--><-I6-> ) User's first
         * and last initials (l), program name (P), date/time (M/D/Y,H:m),
         * dimensional codes (d), scaling factors (S, s), energy (E) if modeling
         * program input, internal registry number (R) if input through MDL
         * form. A blank line can be substituted for line 2.
         */
    writer.write("  ");
    writer.write(getProgName());
    writer.write(new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis()));
    if (dim != 0) {
        writer.write(Integer.toString(dim));
        writer.write('D');
    }
    writer.write('\n');
    String comment = container.getProperty(CDKConstants.REMARK);
    if (comment == null)
        comment = "";
    if (comment.length() > 80)
        comment = comment.substring(0, 80);
    writer.write(comment);
    writer.write('\n');
    // index stereo elements for setting atom parity values
    Map<IAtom, ITetrahedralChirality> atomstereo = new HashMap<>();
    Map<IAtom, Integer> atomindex = new HashMap<>();
    for (IStereoElement element : container.stereoElements()) if (element instanceof ITetrahedralChirality)
        atomstereo.put(((ITetrahedralChirality) element).getChiralAtom(), (ITetrahedralChirality) element);
    for (IAtom atom : container.atoms()) atomindex.put(atom, atomindex.size());
    // write Counts line
    line.append(formatMDLInt(container.getAtomCount(), 3));
    line.append(formatMDLInt(container.getBondCount(), 3));
    // find all the atoms that should be atom lists
    Map<Integer, IAtom> atomLists = new LinkedHashMap<>();
    for (int f = 0; f < container.getAtomCount(); f++) {
        if (container.getAtom(f) instanceof IQueryAtom) {
            QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
            Expr expr = queryAtom.getExpression();
            if (isValidAtomListExpression(expr)) {
                atomLists.put(f, container.getAtom(f));
            }
        }
    }
    // write number of atom lists
    line.append(formatMDLInt(atomLists.size(), 3));
    line.append("  0");
    line.append(getChiralFlag(atomstereo.values()) ? "  1" : "  0");
    line.append("  0  0  0  0  0999 V2000");
    writer.write(line.toString());
    writer.write('\n');
    // write Atom block
    for (int f = 0; f < container.getAtomCount(); f++) {
        IAtom atom = container.getAtom(f);
        line.setLength(0);
        switch(dim) {
            case 0:
                // if no coordinates available, then output a number
                // of zeros
                line.append("    0.0000    0.0000    0.0000 ");
                break;
            case 2:
                if (atom.getPoint2d() != null) {
                    line.append(formatMDLFloat((float) atom.getPoint2d().x));
                    line.append(formatMDLFloat((float) atom.getPoint2d().y));
                    line.append("    0.0000 ");
                } else {
                    line.append("    0.0000    0.0000    0.0000 ");
                }
                break;
            case 3:
                if (atom.getPoint3d() != null) {
                    line.append(formatMDLFloat((float) atom.getPoint3d().x));
                    line.append(formatMDLFloat((float) atom.getPoint3d().y));
                    line.append(formatMDLFloat((float) atom.getPoint3d().z)).append(" ");
                } else {
                    line.append("    0.0000    0.0000    0.0000 ");
                }
                break;
        }
        if (container.getAtom(f) instanceof IPseudoAtom) {
            // according to http://www.google.co.uk/url?sa=t&ct=res&cd=2&url=http%3A%2F%2Fwww.mdl.com%2Fdownloads%2Fpublic%2Fctfile%2Fctfile.pdf&ei=MsJjSMbjAoyq1gbmj7zCDQ&usg=AFQjCNGaJSvH4wYy4FTXIaQ5f7hjoTdBAw&sig2=eSfruNOSsdMFdlrn7nhdAw an R group is written as R#
            IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(f);
            String label = pseudoAtom.getLabel();
            if (// set to empty string if null
            label == null)
                label = "";
            // firstly check if it's a numbered R group
            Matcher matcher = NUMERED_R_GROUP.matcher(label);
            if (pseudoAtom.getAtomicNumber() == IElement.Wildcard && !label.isEmpty() && matcher.matches()) {
                line.append("R# ");
                if (rgroups == null) {
                    // we use a tree map to ensure the output order is always the same
                    rgroups = new TreeMap<>();
                }
                rgroups.put(f + 1, Integer.parseInt(matcher.group(1)));
            } else // not a numbered R group - note the symbol may still be R
            {
                // to use an alias.
                if (label.length() > 3) {
                    if (aliases == null)
                        aliases = new TreeMap<>();
                    // atom index to alias
                    aliases.put(f + 1, label);
                    line.append(formatMDLString(atom.getSymbol(), 3));
                } else {
                    // make sure it's not empty
                    if (!label.isEmpty())
                        line.append(formatMDLString(label, 3));
                    else
                        line.append(formatMDLString(atom.getSymbol(), 3));
                }
            }
        } else if (atomLists.containsKey(f)) {
            line.append(formatMDLString("L", 3));
        } else {
            line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
        }
        // atom properties
        int[] atomprops = new int[12];
        atomprops[0] = determineIsotope(atom);
        atomprops[1] = determineCharge(container, atom);
        atomprops[2] = determineStereoParity(container, atomstereo, atomindex, atom);
        atomprops[5] = determineValence(container, atom);
        atomprops[9] = determineAtomMap(atom);
        // dd (mass-number)
        line.append(formatMDLInt(atomprops[0], 2));
        // ccc (charge)
        line.append(formatMDLInt(atomprops[1], 3));
        int last = atomprops.length - 1;
        if (!writeDefaultProps.isSet()) {
            while (last >= 0) {
                if (atomprops[last] != 0)
                    break;
                last--;
            }
            // matches BIOVIA syntax
            if (last >= 2 && last < 5)
                last = 5;
        }
        for (int i = 2; i <= last; i++) line.append(formatMDLInt(atomprops[i], 3));
        line.append('\n');
        writer.write(line.toString());
    }
    // write Bond block
    for (IBond bond : container.bonds()) {
        line.setLength(0);
        if (bond.getAtomCount() != 2) {
            logger.warn("Skipping bond with more/less than two atoms: " + bond);
        } else {
            if (bond.getStereo() == IBond.Stereo.UP_INVERTED || bond.getStereo() == IBond.Stereo.DOWN_INVERTED || bond.getStereo() == IBond.Stereo.UP_OR_DOWN_INVERTED) {
                // turn around atom coding to correct for inv stereo
                line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
                line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
            } else {
                line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
                line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
            }
            int bondType = 0;
            if (bond instanceof QueryBond) {
                QueryBond qbond = ((QueryBond) bond);
                Expr e = qbond.getExpression();
                switch(e.type()) {
                    case ALIPHATIC_ORDER:
                    case ORDER:
                        bondType = e.value();
                        break;
                    case IS_AROMATIC:
                        bondType = 4;
                        break;
                    case SINGLE_OR_DOUBLE:
                        bondType = 5;
                        break;
                    case SINGLE_OR_AROMATIC:
                        bondType = 6;
                        break;
                    case DOUBLE_OR_AROMATIC:
                        bondType = 7;
                        break;
                    case TRUE:
                        bondType = 8;
                        break;
                    case OR:
                        // SINGLE_OR_DOUBLE
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))) || e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
                            bondType = 5;
                        else // SINGLE_OR_AROMATIC
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
                            bondType = 6;
                        else // DOUBLE_OR_AROMATIC
                        if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))))
                            bondType = 6;
                        break;
                    default:
                        throw new IllegalArgumentException("Unsupported bond type!");
                }
            } else {
                if (bond.getOrder() != null) {
                    switch(bond.getOrder()) {
                        case SINGLE:
                        case DOUBLE:
                        case TRIPLE:
                            if (writeAromaticBondTypes.isSet() && bond.isAromatic())
                                bondType = 4;
                            else
                                bondType = bond.getOrder().numeric();
                            break;
                        case UNSET:
                            if (bond.isAromatic()) {
                                if (!writeAromaticBondTypes.isSet())
                                    throw new CDKException("Bond at idx " + container.indexOf(bond) + " was an unspecific aromatic bond which should only be used for queries in Molfiles. These can be written if desired by enabling the option 'WriteAromaticBondTypes'.");
                                bondType = 4;
                            }
                            break;
                    }
                }
            }
            if (bondType == 0)
                throw new CDKException("Bond at idx=" + container.indexOf(bond) + " is not supported by Molfile, bond=" + bond.getOrder());
            line.append(formatMDLInt(bondType, 3));
            line.append("  ");
            switch(bond.getStereo()) {
                case UP:
                    line.append("1");
                    break;
                case UP_INVERTED:
                    line.append("1");
                    break;
                case DOWN:
                    line.append("6");
                    break;
                case DOWN_INVERTED:
                    line.append("6");
                    break;
                case UP_OR_DOWN:
                    line.append("4");
                    break;
                case UP_OR_DOWN_INVERTED:
                    line.append("4");
                    break;
                case E_OR_Z:
                    line.append("3");
                    break;
                default:
                    line.append("0");
            }
            if (writeDefaultProps.isSet())
                line.append("  0  0  0");
            line.append('\n');
            writer.write(line.toString());
        }
    }
    // Write Atom Value
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        if (atom.getProperty(CDKConstants.COMMENT) != null && atom.getProperty(CDKConstants.COMMENT) instanceof String && !((String) atom.getProperty(CDKConstants.COMMENT)).trim().equals("")) {
            writer.write("V  ");
            writer.write(formatMDLInt(i + 1, 3));
            writer.write(" ");
            writer.write((String) atom.getProperty(CDKConstants.COMMENT));
            writer.write('\n');
        }
    }
    // write formal atomic charges
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        Integer charge = atom.getFormalCharge();
        if (charge != null && charge != 0) {
            writer.write("M  CHG  1 ");
            writer.write(formatMDLInt(i + 1, 3));
            writer.write(" ");
            writer.write(formatMDLInt(charge, 3));
            writer.write('\n');
        }
    }
    // write radical information
    if (container.getSingleElectronCount() > 0) {
        Map<Integer, SPIN_MULTIPLICITY> atomIndexSpinMap = new LinkedHashMap<>();
        for (int i = 0; i < container.getAtomCount(); i++) {
            IAtom atom = container.getAtom(i);
            int eCount = container.getConnectedSingleElectronsCount(atom);
            switch(eCount) {
                case 0:
                    continue;
                case 1:
                    atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent);
                    break;
                case 2:
                    SPIN_MULTIPLICITY multiplicity = atom.getProperty(CDKConstants.SPIN_MULTIPLICITY);
                    if (multiplicity != null)
                        atomIndexSpinMap.put(i, multiplicity);
                    else {
                        // information loss, divalent but singlet or triplet?
                        atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet);
                    }
                    break;
                default:
                    logger.debug("Invalid number of radicals found: " + eCount);
                    break;
            }
        }
        Iterator<Map.Entry<Integer, SPIN_MULTIPLICITY>> iterator = atomIndexSpinMap.entrySet().iterator();
        for (int i = 0; i < atomIndexSpinMap.size(); i += NN8) {
            if (atomIndexSpinMap.size() - i <= NN8) {
                writer.write("M  RAD" + formatMDLInt(atomIndexSpinMap.size() - i, WIDTH));
                writeRadicalPattern(iterator, 0);
            } else {
                writer.write("M  RAD" + formatMDLInt(NN8, WIDTH));
                writeRadicalPattern(iterator, 0);
            }
            writer.write('\n');
        }
    }
    // write formal isotope information
    for (int i = 0; i < container.getAtomCount(); i++) {
        IAtom atom = container.getAtom(i);
        if (!(atom instanceof IPseudoAtom)) {
            Integer atomicMass = atom.getMassNumber();
            if (!writeMajorIsotopes.isSet() && isMajorIsotope(atom))
                atomicMass = null;
            if (atomicMass != null) {
                writer.write("M  ISO  1 ");
                writer.write(formatMDLInt(i + 1, 3));
                writer.write(" ");
                writer.write(formatMDLInt(atomicMass, 3));
                writer.write('\n');
            }
        }
    }
    // write RGP line (max occurrence is 16 data points per line)
    if (rgroups != null) {
        StringBuilder rgpLine = new StringBuilder();
        int cnt = 0;
        // number this isn't an issue
        for (Map.Entry<Integer, Integer> e : rgroups.entrySet()) {
            rgpLine.append(formatMDLInt(e.getKey(), 4));
            rgpLine.append(formatMDLInt(e.getValue(), 4));
            cnt++;
            if (cnt == 8) {
                rgpLine.insert(0, "M  RGP" + formatMDLInt(cnt, 3));
                writer.write(rgpLine.toString());
                writer.write('\n');
                rgpLine = new StringBuilder();
                cnt = 0;
            }
        }
        if (cnt != 0) {
            rgpLine.insert(0, "M  RGP" + formatMDLInt(cnt, 3));
            writer.write(rgpLine.toString());
            writer.write('\n');
        }
    }
    // write atom aliases
    if (aliases != null) {
        for (Map.Entry<Integer, String> e : aliases.entrySet()) {
            writer.write("A" + formatMDLInt(e.getKey(), 5));
            writer.write('\n');
            String label = e.getValue();
            // fixed width file - doubtful someone would have a label > 70 but trim if they do
            if (label.length() > 70)
                label = label.substring(0, 70);
            writer.write(label);
            writer.write('\n');
        }
    }
    // write atom lists
    writeAtomLists(atomLists, writer);
    writeSgroups(container, writer, atomindex);
    // close molecule
    writer.write("M  END");
    writer.write('\n');
    writer.flush();
}
Also used : IPseudoAtom(org.openscience.cdk.interfaces.IPseudoAtom) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Matcher(java.util.regex.Matcher) IBond(org.openscience.cdk.interfaces.IBond) LinkedHashMap(java.util.LinkedHashMap) IQueryAtom(org.openscience.cdk.isomorphism.matchers.IQueryAtom) IAtom(org.openscience.cdk.interfaces.IAtom) CDKException(org.openscience.cdk.exception.CDKException) TreeMap(java.util.TreeMap) IQueryAtom(org.openscience.cdk.isomorphism.matchers.IQueryAtom) QueryAtom(org.openscience.cdk.isomorphism.matchers.QueryAtom) Expr(org.openscience.cdk.isomorphism.matchers.Expr) ITetrahedralChirality(org.openscience.cdk.interfaces.ITetrahedralChirality) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) SimpleDateFormat(java.text.SimpleDateFormat) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) AbstractMap(java.util.AbstractMap) TreeMap(java.util.TreeMap) IStereoElement(org.openscience.cdk.interfaces.IStereoElement)

Example 3 with QueryBond

use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.

the class MDLV2000BondBlockTest method anyBond.

@Test
public void anyBond() throws Exception {
    String input = "  1  3  8  0  0  0  0";
    IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
    assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
    assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
    assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
    assertThat(bond, is(instanceOf(QueryBond.class)));
    assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.TRUE));
}
Also used : IBond(org.openscience.cdk.interfaces.IBond) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) Test(org.junit.Test)

Example 4 with QueryBond

use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.

the class MDLV2000BondBlockTest method singleOrAromaticBond.

@Test
public void singleOrAromaticBond() throws Exception {
    String input = "  1  3  6  0  0  0  0";
    IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
    assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
    assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
    assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
    assertThat(bond, is(instanceOf(QueryBond.class)));
    assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.SINGLE_OR_AROMATIC));
}
Also used : IBond(org.openscience.cdk.interfaces.IBond) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) Test(org.junit.Test)

Example 5 with QueryBond

use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.

the class MDLV2000BondBlockTest method doubleOrAromaticBond.

@Test
public void doubleOrAromaticBond() throws Exception {
    String input = "  1  3  7  0  0  0  0";
    IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
    assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
    assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
    assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
    assertThat(bond, is(instanceOf(QueryBond.class)));
    assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.DOUBLE_OR_AROMATIC));
}
Also used : IBond(org.openscience.cdk.interfaces.IBond) QueryBond(org.openscience.cdk.isomorphism.matchers.QueryBond) Test(org.junit.Test)

Aggregations

IBond (org.openscience.cdk.interfaces.IBond)11 QueryBond (org.openscience.cdk.isomorphism.matchers.QueryBond)11 Test (org.junit.Test)6 IAtom (org.openscience.cdk.interfaces.IAtom)4 Expr (org.openscience.cdk.isomorphism.matchers.Expr)3 IQueryAtomContainer (org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer)3 IQueryBond (org.openscience.cdk.isomorphism.matchers.IQueryBond)3 QueryAtomContainer (org.openscience.cdk.isomorphism.matchers.QueryAtomContainer)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)2 AtomContainer (org.openscience.cdk.AtomContainer)2 CDKException (org.openscience.cdk.exception.CDKException)2 IAtomContainer (org.openscience.cdk.interfaces.IAtomContainer)2 SimpleChemObjectReaderTest (org.openscience.cdk.test.io.SimpleChemObjectReaderTest)2 SimpleDateFormat (java.text.SimpleDateFormat)1 AbstractMap (java.util.AbstractMap)1 Map (java.util.Map)1