Search in sources :

Example 1 with ICountFingerprint

use of org.openscience.cdk.fingerprint.ICountFingerprint in project ambit-mirror by ideaconsult.

the class Context method parseCommandFingerprints.

public void parseCommandFingerprints(String subcommand, long now) throws Exception {
    boolean multifile = true;
    int page = parsePageParam();
    int pagesize = parsePageSizeParam();
    final boolean fp_count = parseWriteCountParam();
    final boolean fp_raw = parseWriteRawParam();
    String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
    String inchi_header = parseInputTag_Param("inchi", "InChI");
    String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
    Object tmpTag = parseSdfTitleParam();
    final String[] tags_to_keep = parsetags_to_keep();
    final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
    final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
    final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
    final File file = getInputFile();
    FileInputState input = new FileInputState(file);
    input.setOptionalSMILESHeader(smiles_header);
    input.setOptionalInChIHeader(inchi_header);
    input.setOptionalInChIKeyHeader(inchikey_header);
    if (options.output == null)
        throw new FileNotFoundException("Output file not specified. Please use -o {file}");
    final File outfile = new File(options.output);
    logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
    final List<IFingerprinter> fps = parseFingerprinterParams();
    FileOutputState out = null;
    final IChemObjectWriter awriter;
    if (multifile) {
        awriter = new MultiFingerprintsWriter(outfile, fps, tags_to_keep);
    } else {
        out = new FileOutputState(outfile);
        awriter = out.getWriter();
        if (awriter instanceof FilesWithHeaderWriter)
            ((FilesWithHeaderWriter) awriter).setAddSMILEScolumn(false);
    }
    final IChemObjectWriter writer = awriter;
    final boolean writesdf = writer instanceof SDFWriter;
    final Map<Object, Property> tags = new HashMap<>();
    Property newtag = Property.getSMILESInstance();
    newtag.setName("SMILES");
    newtag.setEnabled(false);
    tags.put(Property.opentox_SMILES, newtag);
    tags.put(Property.getSMILESInstance(), newtag);
    newtag = Property.getInChIInstance();
    newtag.setEnabled(false);
    tags.put(Property.opentox_InChI, newtag);
    tags.put("InChI", newtag);
    tags.put(Property.getInChIInstance(), newtag);
    newtag = Property.getInChIKeyInstance();
    newtag.setEnabled(true);
    newtag.setName("InChIKey");
    tags.put(Property.opentox_InChIKey, newtag);
    tags.put(Property.getInChIKeyInstance(), newtag);
    newtag = Property.getInstance(CDKConstants.TITLE, CDKConstants.TITLE);
    newtag.setEnabled(false);
    tags.put(CDKConstants.TITLE, newtag);
    tags.put(newtag, newtag);
    newtag = Property.getInstance("CHEMBL", "CHEMBL");
    newtag.setEnabled(false);
    tags.put("CHEMBL", newtag);
    tags.put(newtag, newtag);
    final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
                cancel();
        }

        @Override
        public boolean skip(IStructureRecord input, IBatchStatistics stats) {
            return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
        }

        @Override
        public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
            super.onItemSkipped(input, stats);
            if (stats.isTimeToPrint(getSilentInterval() * 2))
                propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
        }

        @Override
        public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, "MSG_ERR", new Object[] { x.getMessage() });
        }

        @Override
        public long getSilentInterval() {
            return 30000L;
        }

        @Override
        public void close() throws Exception {
            try {
                writer.close();
            } catch (Exception x) {
            } finally {
            }
            super.close();
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {

        protected MoleculeReader molReader = new MoleculeReader(true, false);

        @Override
        public IStructureRecord process(IStructureRecord record) throws Exception {
            IAtomContainer mol;
            IAtomContainer processed = null;
            try {
                mol = molReader.process(record);
                if (mol != null) {
                    for (Property p : record.getRecordProperties()) {
                        Object v = record.getRecordProperty(p);
                        String pname = p.getName().replace("http://www.opentox.org/api/1.1#", "");
                        // already parsed
                        if (tags_to_keep != null && Arrays.binarySearch(tags_to_keep, pname) < 0)
                            continue;
                        else
                            mol.setProperty(p, v);
                    }
                    if (tags_to_keep != null) {
                        List<String> toRemove = null;
                        Iterator pi = mol.getProperties().keySet().iterator();
                        while (pi.hasNext()) {
                            Object p = pi.next();
                            if (Arrays.binarySearch(tags_to_keep, p.toString()) < 0) {
                                if (toRemove == null)
                                    toRemove = new ArrayList<String>();
                                toRemove.add(p.toString());
                            }
                        }
                        if (toRemove != null)
                            for (String propertyToRemove : toRemove) mol.removeProperty(propertyToRemove);
                    }
                } else {
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
                    return record;
                }
                AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
            } catch (Exception x) {
                logger_cli.log(Level.SEVERE, "MSG_ERR_MOLREAD", new Object[] { getIds(record), x.toString() });
                return record;
            } finally {
            }
            processed = mol;
            for (IFingerprinter fp : fps) {
                ICountFingerprint cfp = null;
                try {
                    cfp = fp.getCountFingerprint(processed);
                } catch (Exception x) {
                    logger.log(Level.FINER, x.getMessage());
                }
                IBitFingerprint bfp = null;
                try {
                    bfp = fp.getBitFingerprint(processed);
                } catch (Exception x) {
                }
                Map<String, Integer> fpraw = null;
                try {
                    if (fp_raw)
                        fpraw = fp.getRawFingerprint(processed);
                } catch (Exception x) {
                }
                try {
                    if (cfp != null) {
                        if (fp_count)
                            processed.setProperty(fp.getClass().getName() + ".count", cfp);
                        processed.setProperty(fp.getClass().getName(), cfp);
                    }
                    if (bfp != null)
                        processed.setProperty(fp.getClass().getName() + ".hashed", bfp);
                    if (fpraw != null)
                        processed.setProperty(fp.getClass().getName() + ".raw", fpraw);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                    if (processed != null)
                        processed.setProperty("ERROR." + fp.getClass().getName(), x.getMessage());
                } finally {
                    if (processed != null)
                        processed.addProperties(mol.getProperties());
                }
            }
            if (processed != null)
                try {
                    if (writesdf && sdf_title != null) {
                        for (Entry<Object, Object> p : processed.getProperties().entrySet()) if (sdf_title.equals(p.getKey().toString().toLowerCase())) {
                            processed.setProperty(CDKConstants.TITLE, p.getValue());
                            break;
                        }
                    }
                    StructureStandardizer.renameTags(processed, tags, true);
                    writer.write(processed);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                }
            else {
                logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
            }
            return record;
        }
    });
    batch.addPropertyChangeListener(new PropertyChangeListener() {

        @Override
        public void propertyChange(PropertyChangeEvent evt) {
            if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
                logger_cli.log(Level.INFO, evt.getNewValue().toString());
        }
    });
    /*
		 * standardprocessor.setCallback(new
		 * DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
		 * 
		 * @Override public IAtomContainer process(IAtomContainer target) throws
		 * Exception { try { //writer.write(target); } catch (Exception x) {
		 * logger.log(Level.SEVERE, x.getMessage()); } return target; } });
		 */
    IBatchStatistics stats = null;
    try {
        stats = batch.process(input);
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    } finally {
        try {
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        if (stats != null)
            logger_cli.log(Level.INFO, stats.toString());
    }
}
Also used : IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) PropertyChangeListener(java.beans.PropertyChangeListener) HashMap(java.util.HashMap) FileNotFoundException(java.io.FileNotFoundException) SDFWriter(org.openscience.cdk.io.SDFWriter) IFingerprinter(org.openscience.cdk.fingerprint.IFingerprinter) IChemObjectWriter(org.openscience.cdk.io.IChemObjectWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) MoleculeReader(ambit2.core.processors.structure.MoleculeReader) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Property(ambit2.base.data.Property) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) PropertyChangeEvent(java.beans.PropertyChangeEvent) FileOutputState(ambit2.core.io.FileOutputState) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) IProcessor(net.idea.modbcum.i.processors.IProcessor) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) FilesWithHeaderWriter(ambit2.core.io.FilesWithHeaderWriter) MultiFingerprintsWriter(ambit2.core.io.fp.MultiFingerprintsWriter) File(java.io.File) FileInputState(ambit2.core.io.FileInputState) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with ICountFingerprint

use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.

the class Tanimoto method method1.

/**
 * Calculates Tanimoto distance for two count fingerprints using method 1.
 *
 * The feature/count type fingerprints may be of different length.
 * Uses Tanimoto method from {@cdk.cite Steffen09}.
 *
 * @param fp1 count fingerprint 1
 * @param fp2 count fingerprint 2
 * @return a Tanimoto distance
 */
public static double method1(ICountFingerprint fp1, ICountFingerprint fp2) {
    long xy = 0, x = 0, y = 0;
    for (int i = 0; i < fp1.numOfPopulatedbins(); i++) {
        int hash = fp1.getHash(i);
        for (int j = 0; j < fp2.numOfPopulatedbins(); j++) {
            if (hash == fp2.getHash(j)) {
                xy += (long) fp1.getCount(i) * fp2.getCount(j);
            }
        }
        x += (long) fp1.getCount(i) * fp1.getCount(i);
    }
    for (int j = 0; j < fp2.numOfPopulatedbins(); j++) {
        y += (long) fp2.getCount(j) * fp2.getCount(j);
    }
    long union = x + y - xy;
    if (union == 0)
        throw new IllegalArgumentException(EMPTY_FINGERPRINTS_PROVIDED);
    return ((double) xy / union);
}
Also used : BitSetFingerprint(org.openscience.cdk.fingerprint.BitSetFingerprint) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) IntArrayFingerprint(org.openscience.cdk.fingerprint.IntArrayFingerprint)

Example 3 with ICountFingerprint

use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.

the class SignatureFingerprintTanimotoTest method testCountMethod1and2.

@Test
public void testCountMethod1and2() throws CDKException {
    ICountFingerprint fp1 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {

        {
            put("A", 3);
        }
    });
    ICountFingerprint fp2 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {

        {
            put("A", 4);
        }
    });
    Assert.assertEquals(0.923, Tanimoto.method1(fp1, fp2), 0.001);
    Assert.assertEquals(0.75, Tanimoto.method2(fp1, fp2), 0.001);
    IAtomContainer mol1 = TestMoleculeFactory.makeIndole();
    IAtomContainer mol2 = TestMoleculeFactory.makeIndole();
    SignatureFingerprinter fingerprinter = new SignatureFingerprinter();
    fp1 = fingerprinter.getCountFingerprint(mol1);
    fp2 = fingerprinter.getCountFingerprint(mol2);
    Assert.assertEquals(1.0, Tanimoto.method1(fp1, fp2), 0.001);
    Assert.assertEquals(1.0, Tanimoto.method2(fp1, fp2), 0.001);
}
Also used : ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) SignatureFingerprinter(org.openscience.cdk.fingerprint.SignatureFingerprinter) IntArrayCountFingerprint(org.openscience.cdk.fingerprint.IntArrayCountFingerprint) Test(org.junit.Test)

Example 4 with ICountFingerprint

use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.

the class SignatureFingerprintTanimotoTest method testComparingBitFingerprintAndCountBehavingAsBit.

@Test
public void testComparingBitFingerprintAndCountBehavingAsBit() throws Exception {
    IAtomContainer mol1 = TestMoleculeFactory.make123Triazole();
    IAtomContainer mol2 = TestMoleculeFactory.makeImidazole();
    SignatureFingerprinter fingerprinter = new SignatureFingerprinter(1);
    ICountFingerprint countFp1 = fingerprinter.getCountFingerprint(mol1);
    ICountFingerprint countFp2 = fingerprinter.getCountFingerprint(mol2);
    countFp1.setBehaveAsBitFingerprint(true);
    countFp2.setBehaveAsBitFingerprint(true);
    IBitFingerprint bitFp1 = fingerprinter.getBitFingerprint(mol1);
    IBitFingerprint bitFp2 = fingerprinter.getBitFingerprint(mol2);
    double bitTanimoto = Tanimoto.calculate(bitFp1, bitFp2);
    double countTanimoto1 = Tanimoto.method1(countFp1, countFp2);
    double countTanimoto2 = Tanimoto.method2(countFp1, countFp2);
    Assert.assertEquals(countTanimoto1, countTanimoto2, 0.001);
    Assert.assertEquals(bitTanimoto, countTanimoto1, 0.001);
}
Also used : ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) SignatureFingerprinter(org.openscience.cdk.fingerprint.SignatureFingerprinter) Test(org.junit.Test)

Example 5 with ICountFingerprint

use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.

the class TanimotoTest method method1.

@Test
public void method1() throws CDKException {
    ICountFingerprint fp1 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {

        {
            put("A", 3);
        }
    });
    ICountFingerprint fp2 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {

        {
            put("A", 4);
        }
    });
    Assert.assertEquals(0.923, Tanimoto.method1(fp1, fp2), 0.001);
}
Also used : ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IntArrayCountFingerprint(org.openscience.cdk.fingerprint.IntArrayCountFingerprint) Test(org.junit.Test)

Aggregations

ICountFingerprint (org.openscience.cdk.fingerprint.ICountFingerprint)11 Test (org.junit.Test)7 IAtomContainer (org.openscience.cdk.interfaces.IAtomContainer)6 IBitFingerprint (org.openscience.cdk.fingerprint.IBitFingerprint)5 SignatureFingerprinter (org.openscience.cdk.fingerprint.SignatureFingerprinter)4 IntArrayCountFingerprint (org.openscience.cdk.fingerprint.IntArrayCountFingerprint)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 BitSetFingerprint (org.openscience.cdk.fingerprint.BitSetFingerprint)2 IntArrayFingerprint (org.openscience.cdk.fingerprint.IntArrayFingerprint)2 Property (ambit2.base.data.Property)1 IStructureRecord (ambit2.base.interfaces.IStructureRecord)1 FileInputState (ambit2.core.io.FileInputState)1 FileOutputState (ambit2.core.io.FileOutputState)1 FilesWithHeaderWriter (ambit2.core.io.FilesWithHeaderWriter)1 MultiFingerprintsWriter (ambit2.core.io.fp.MultiFingerprintsWriter)1 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)1 BatchDBProcessor (ambit2.db.processors.BatchDBProcessor)1 PropertyChangeEvent (java.beans.PropertyChangeEvent)1 PropertyChangeListener (java.beans.PropertyChangeListener)1