use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.
the class Abbreviations method matchExact.
/**
* Internal - create a query atom container that exactly matches the molecule provided.
* Similar to {@link org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator}
* but we can't access SMARTS query classes from that module (cdk-isomorphism).
*
* @param mol molecule
* @return query container
* @see org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator
*/
private IQueryAtomContainer matchExact(IAtomContainer mol) {
final IChemObjectBuilder bldr = mol.getBuilder();
final IQueryAtomContainer qry = new QueryAtomContainer(mol.getBuilder());
final Map<IAtom, IAtom> atmmap = new HashMap<>();
for (IAtom atom : mol.atoms()) {
IAtom qatom = matchExact(mol, atom);
if (qatom != null) {
atmmap.put(atom, qatom);
qry.addAtom(qatom);
}
}
for (IBond bond : mol.bonds()) {
final IAtom beg = atmmap.get(bond.getBegin());
final IAtom end = atmmap.get(bond.getEnd());
// attach bond skipped
if (beg == null || end == null)
continue;
IQueryBond qbond = new QueryBond(beg, end, Expr.Type.TRUE);
qry.addBond(qbond);
}
return qry;
}
use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.
the class MDLV2000Writer method writeMolecule.
/**
* Writes a Molecule to an OutputStream in MDL sdf format.
*
* @param container Molecule that is written to an OutputStream
*/
public void writeMolecule(IAtomContainer container) throws Exception {
final int dim = getNumberOfDimensions(container);
StringBuilder line = new StringBuilder();
Map<Integer, Integer> rgroups = null;
Map<Integer, String> aliases = null;
// write header block
// lines get shortened to 80 chars, that's in the spec
String title = container.getTitle();
if (title == null)
title = "";
if (title.length() > 80)
title = title.substring(0, 80);
writer.write(title);
writer.write('\n');
/*
* From CTX spec This line has the format:
* IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR (FORTRAN:
* A2<--A8--><---A10-->A2I2<--F10.5-><---F12.5--><-I6-> ) User's first
* and last initials (l), program name (P), date/time (M/D/Y,H:m),
* dimensional codes (d), scaling factors (S, s), energy (E) if modeling
* program input, internal registry number (R) if input through MDL
* form. A blank line can be substituted for line 2.
*/
writer.write(" ");
writer.write(getProgName());
writer.write(new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis()));
if (dim != 0) {
writer.write(Integer.toString(dim));
writer.write('D');
}
writer.write('\n');
String comment = container.getProperty(CDKConstants.REMARK);
if (comment == null)
comment = "";
if (comment.length() > 80)
comment = comment.substring(0, 80);
writer.write(comment);
writer.write('\n');
// index stereo elements for setting atom parity values
Map<IAtom, ITetrahedralChirality> atomstereo = new HashMap<>();
Map<IAtom, Integer> atomindex = new HashMap<>();
for (IStereoElement element : container.stereoElements()) if (element instanceof ITetrahedralChirality)
atomstereo.put(((ITetrahedralChirality) element).getChiralAtom(), (ITetrahedralChirality) element);
for (IAtom atom : container.atoms()) atomindex.put(atom, atomindex.size());
// write Counts line
line.append(formatMDLInt(container.getAtomCount(), 3));
line.append(formatMDLInt(container.getBondCount(), 3));
// find all the atoms that should be atom lists
Map<Integer, IAtom> atomLists = new LinkedHashMap<>();
for (int f = 0; f < container.getAtomCount(); f++) {
if (container.getAtom(f) instanceof IQueryAtom) {
QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
Expr expr = queryAtom.getExpression();
if (isValidAtomListExpression(expr)) {
atomLists.put(f, container.getAtom(f));
}
}
}
// write number of atom lists
line.append(formatMDLInt(atomLists.size(), 3));
line.append(" 0");
line.append(getChiralFlag(atomstereo.values()) ? " 1" : " 0");
line.append(" 0 0 0 0 0999 V2000");
writer.write(line.toString());
writer.write('\n');
// write Atom block
for (int f = 0; f < container.getAtomCount(); f++) {
IAtom atom = container.getAtom(f);
line.setLength(0);
switch(dim) {
case 0:
// if no coordinates available, then output a number
// of zeros
line.append(" 0.0000 0.0000 0.0000 ");
break;
case 2:
if (atom.getPoint2d() != null) {
line.append(formatMDLFloat((float) atom.getPoint2d().x));
line.append(formatMDLFloat((float) atom.getPoint2d().y));
line.append(" 0.0000 ");
} else {
line.append(" 0.0000 0.0000 0.0000 ");
}
break;
case 3:
if (atom.getPoint3d() != null) {
line.append(formatMDLFloat((float) atom.getPoint3d().x));
line.append(formatMDLFloat((float) atom.getPoint3d().y));
line.append(formatMDLFloat((float) atom.getPoint3d().z)).append(" ");
} else {
line.append(" 0.0000 0.0000 0.0000 ");
}
break;
}
if (container.getAtom(f) instanceof IPseudoAtom) {
// according to http://www.google.co.uk/url?sa=t&ct=res&cd=2&url=http%3A%2F%2Fwww.mdl.com%2Fdownloads%2Fpublic%2Fctfile%2Fctfile.pdf&ei=MsJjSMbjAoyq1gbmj7zCDQ&usg=AFQjCNGaJSvH4wYy4FTXIaQ5f7hjoTdBAw&sig2=eSfruNOSsdMFdlrn7nhdAw an R group is written as R#
IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(f);
String label = pseudoAtom.getLabel();
if (// set to empty string if null
label == null)
label = "";
// firstly check if it's a numbered R group
Matcher matcher = NUMERED_R_GROUP.matcher(label);
if (pseudoAtom.getAtomicNumber() == IElement.Wildcard && !label.isEmpty() && matcher.matches()) {
line.append("R# ");
if (rgroups == null) {
// we use a tree map to ensure the output order is always the same
rgroups = new TreeMap<>();
}
rgroups.put(f + 1, Integer.parseInt(matcher.group(1)));
} else // not a numbered R group - note the symbol may still be R
{
// to use an alias.
if (label.length() > 3) {
if (aliases == null)
aliases = new TreeMap<>();
// atom index to alias
aliases.put(f + 1, label);
line.append(formatMDLString(atom.getSymbol(), 3));
} else {
// make sure it's not empty
if (!label.isEmpty())
line.append(formatMDLString(label, 3));
else
line.append(formatMDLString(atom.getSymbol(), 3));
}
}
} else if (atomLists.containsKey(f)) {
line.append(formatMDLString("L", 3));
} else {
line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
}
// atom properties
int[] atomprops = new int[12];
atomprops[0] = determineIsotope(atom);
atomprops[1] = determineCharge(container, atom);
atomprops[2] = determineStereoParity(container, atomstereo, atomindex, atom);
atomprops[5] = determineValence(container, atom);
atomprops[9] = determineAtomMap(atom);
// dd (mass-number)
line.append(formatMDLInt(atomprops[0], 2));
// ccc (charge)
line.append(formatMDLInt(atomprops[1], 3));
int last = atomprops.length - 1;
if (!writeDefaultProps.isSet()) {
while (last >= 0) {
if (atomprops[last] != 0)
break;
last--;
}
// matches BIOVIA syntax
if (last >= 2 && last < 5)
last = 5;
}
for (int i = 2; i <= last; i++) line.append(formatMDLInt(atomprops[i], 3));
line.append('\n');
writer.write(line.toString());
}
// write Bond block
for (IBond bond : container.bonds()) {
line.setLength(0);
if (bond.getAtomCount() != 2) {
logger.warn("Skipping bond with more/less than two atoms: " + bond);
} else {
if (bond.getStereo() == IBond.Stereo.UP_INVERTED || bond.getStereo() == IBond.Stereo.DOWN_INVERTED || bond.getStereo() == IBond.Stereo.UP_OR_DOWN_INVERTED) {
// turn around atom coding to correct for inv stereo
line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
} else {
line.append(formatMDLInt(atomindex.get(bond.getBegin()) + 1, 3));
line.append(formatMDLInt(atomindex.get(bond.getEnd()) + 1, 3));
}
int bondType = 0;
if (bond instanceof QueryBond) {
QueryBond qbond = ((QueryBond) bond);
Expr e = qbond.getExpression();
switch(e.type()) {
case ALIPHATIC_ORDER:
case ORDER:
bondType = e.value();
break;
case IS_AROMATIC:
bondType = 4;
break;
case SINGLE_OR_DOUBLE:
bondType = 5;
break;
case SINGLE_OR_AROMATIC:
bondType = 6;
break;
case DOUBLE_OR_AROMATIC:
bondType = 7;
break;
case TRUE:
bondType = 8;
break;
case OR:
// SINGLE_OR_DOUBLE
if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))) || e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
bondType = 5;
else // SINGLE_OR_AROMATIC
if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 1).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 1))))
bondType = 6;
else // DOUBLE_OR_AROMATIC
if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.IS_AROMATIC))) || e.equals(new Expr(Expr.Type.IS_AROMATIC).or(new Expr(Expr.Type.ALIPHATIC_ORDER, 2))))
bondType = 6;
break;
default:
throw new IllegalArgumentException("Unsupported bond type!");
}
} else {
if (bond.getOrder() != null) {
switch(bond.getOrder()) {
case SINGLE:
case DOUBLE:
case TRIPLE:
if (writeAromaticBondTypes.isSet() && bond.isAromatic())
bondType = 4;
else
bondType = bond.getOrder().numeric();
break;
case UNSET:
if (bond.isAromatic()) {
if (!writeAromaticBondTypes.isSet())
throw new CDKException("Bond at idx " + container.indexOf(bond) + " was an unspecific aromatic bond which should only be used for queries in Molfiles. These can be written if desired by enabling the option 'WriteAromaticBondTypes'.");
bondType = 4;
}
break;
}
}
}
if (bondType == 0)
throw new CDKException("Bond at idx=" + container.indexOf(bond) + " is not supported by Molfile, bond=" + bond.getOrder());
line.append(formatMDLInt(bondType, 3));
line.append(" ");
switch(bond.getStereo()) {
case UP:
line.append("1");
break;
case UP_INVERTED:
line.append("1");
break;
case DOWN:
line.append("6");
break;
case DOWN_INVERTED:
line.append("6");
break;
case UP_OR_DOWN:
line.append("4");
break;
case UP_OR_DOWN_INVERTED:
line.append("4");
break;
case E_OR_Z:
line.append("3");
break;
default:
line.append("0");
}
if (writeDefaultProps.isSet())
line.append(" 0 0 0");
line.append('\n');
writer.write(line.toString());
}
}
// Write Atom Value
for (int i = 0; i < container.getAtomCount(); i++) {
IAtom atom = container.getAtom(i);
if (atom.getProperty(CDKConstants.COMMENT) != null && atom.getProperty(CDKConstants.COMMENT) instanceof String && !((String) atom.getProperty(CDKConstants.COMMENT)).trim().equals("")) {
writer.write("V ");
writer.write(formatMDLInt(i + 1, 3));
writer.write(" ");
writer.write((String) atom.getProperty(CDKConstants.COMMENT));
writer.write('\n');
}
}
// write formal atomic charges
for (int i = 0; i < container.getAtomCount(); i++) {
IAtom atom = container.getAtom(i);
Integer charge = atom.getFormalCharge();
if (charge != null && charge != 0) {
writer.write("M CHG 1 ");
writer.write(formatMDLInt(i + 1, 3));
writer.write(" ");
writer.write(formatMDLInt(charge, 3));
writer.write('\n');
}
}
// write radical information
if (container.getSingleElectronCount() > 0) {
Map<Integer, SPIN_MULTIPLICITY> atomIndexSpinMap = new LinkedHashMap<>();
for (int i = 0; i < container.getAtomCount(); i++) {
IAtom atom = container.getAtom(i);
int eCount = container.getConnectedSingleElectronsCount(atom);
switch(eCount) {
case 0:
continue;
case 1:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent);
break;
case 2:
SPIN_MULTIPLICITY multiplicity = atom.getProperty(CDKConstants.SPIN_MULTIPLICITY);
if (multiplicity != null)
atomIndexSpinMap.put(i, multiplicity);
else {
// information loss, divalent but singlet or triplet?
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet);
}
break;
default:
logger.debug("Invalid number of radicals found: " + eCount);
break;
}
}
Iterator<Map.Entry<Integer, SPIN_MULTIPLICITY>> iterator = atomIndexSpinMap.entrySet().iterator();
for (int i = 0; i < atomIndexSpinMap.size(); i += NN8) {
if (atomIndexSpinMap.size() - i <= NN8) {
writer.write("M RAD" + formatMDLInt(atomIndexSpinMap.size() - i, WIDTH));
writeRadicalPattern(iterator, 0);
} else {
writer.write("M RAD" + formatMDLInt(NN8, WIDTH));
writeRadicalPattern(iterator, 0);
}
writer.write('\n');
}
}
// write formal isotope information
for (int i = 0; i < container.getAtomCount(); i++) {
IAtom atom = container.getAtom(i);
if (!(atom instanceof IPseudoAtom)) {
Integer atomicMass = atom.getMassNumber();
if (!writeMajorIsotopes.isSet() && isMajorIsotope(atom))
atomicMass = null;
if (atomicMass != null) {
writer.write("M ISO 1 ");
writer.write(formatMDLInt(i + 1, 3));
writer.write(" ");
writer.write(formatMDLInt(atomicMass, 3));
writer.write('\n');
}
}
}
// write RGP line (max occurrence is 16 data points per line)
if (rgroups != null) {
StringBuilder rgpLine = new StringBuilder();
int cnt = 0;
// number this isn't an issue
for (Map.Entry<Integer, Integer> e : rgroups.entrySet()) {
rgpLine.append(formatMDLInt(e.getKey(), 4));
rgpLine.append(formatMDLInt(e.getValue(), 4));
cnt++;
if (cnt == 8) {
rgpLine.insert(0, "M RGP" + formatMDLInt(cnt, 3));
writer.write(rgpLine.toString());
writer.write('\n');
rgpLine = new StringBuilder();
cnt = 0;
}
}
if (cnt != 0) {
rgpLine.insert(0, "M RGP" + formatMDLInt(cnt, 3));
writer.write(rgpLine.toString());
writer.write('\n');
}
}
// write atom aliases
if (aliases != null) {
for (Map.Entry<Integer, String> e : aliases.entrySet()) {
writer.write("A" + formatMDLInt(e.getKey(), 5));
writer.write('\n');
String label = e.getValue();
// fixed width file - doubtful someone would have a label > 70 but trim if they do
if (label.length() > 70)
label = label.substring(0, 70);
writer.write(label);
writer.write('\n');
}
}
// write atom lists
writeAtomLists(atomLists, writer);
writeSgroups(container, writer, atomindex);
// close molecule
writer.write("M END");
writer.write('\n');
writer.flush();
}
use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.
the class MDLV2000BondBlockTest method anyBond.
@Test
public void anyBond() throws Exception {
String input = " 1 3 8 0 0 0 0";
IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
assertThat(bond, is(instanceOf(QueryBond.class)));
assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.TRUE));
}
use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.
the class MDLV2000BondBlockTest method singleOrAromaticBond.
@Test
public void singleOrAromaticBond() throws Exception {
String input = " 1 3 6 0 0 0 0";
IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
assertThat(bond, is(instanceOf(QueryBond.class)));
assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.SINGLE_OR_AROMATIC));
}
use of org.openscience.cdk.isomorphism.matchers.QueryBond in project cdk by cdk.
the class MDLV2000BondBlockTest method doubleOrAromaticBond.
@Test
public void doubleOrAromaticBond() throws Exception {
String input = " 1 3 7 0 0 0 0";
IBond bond = reader.readBondFast(input, builder, atoms, new int[atoms.length], 1);
assertThat(bond.getStereo(), is(IBond.Stereo.NONE));
assertFalse(bond.getFlag(CDKConstants.ISAROMATIC));
assertFalse(bond.getFlag(CDKConstants.SINGLE_OR_DOUBLE));
assertThat(bond, is(instanceOf(QueryBond.class)));
assertThat(((QueryBond) bond).getExpression().type(), is(Expr.Type.DOUBLE_OR_AROMATIC));
}
Aggregations