Search in sources :

Example 1 with MultiFingerprintsWriter

use of ambit2.core.io.fp.MultiFingerprintsWriter in project ambit-mirror by ideaconsult.

the class Context method parseCommandFingerprints.

public void parseCommandFingerprints(String subcommand, long now) throws Exception {
    boolean multifile = true;
    int page = parsePageParam();
    int pagesize = parsePageSizeParam();
    final boolean fp_count = parseWriteCountParam();
    final boolean fp_raw = parseWriteRawParam();
    String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
    String inchi_header = parseInputTag_Param("inchi", "InChI");
    String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
    Object tmpTag = parseSdfTitleParam();
    final String[] tags_to_keep = parsetags_to_keep();
    final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
    final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
    final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
    final File file = getInputFile();
    FileInputState input = new FileInputState(file);
    input.setOptionalSMILESHeader(smiles_header);
    input.setOptionalInChIHeader(inchi_header);
    input.setOptionalInChIKeyHeader(inchikey_header);
    if (options.output == null)
        throw new FileNotFoundException("Output file not specified. Please use -o {file}");
    final File outfile = new File(options.output);
    logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
    final List<IFingerprinter> fps = parseFingerprinterParams();
    FileOutputState out = null;
    final IChemObjectWriter awriter;
    if (multifile) {
        awriter = new MultiFingerprintsWriter(outfile, fps, tags_to_keep);
    } else {
        out = new FileOutputState(outfile);
        awriter = out.getWriter();
        if (awriter instanceof FilesWithHeaderWriter)
            ((FilesWithHeaderWriter) awriter).setAddSMILEScolumn(false);
    }
    final IChemObjectWriter writer = awriter;
    final boolean writesdf = writer instanceof SDFWriter;
    final Map<Object, Property> tags = new HashMap<>();
    Property newtag = Property.getSMILESInstance();
    newtag.setName("SMILES");
    newtag.setEnabled(false);
    tags.put(Property.opentox_SMILES, newtag);
    tags.put(Property.getSMILESInstance(), newtag);
    newtag = Property.getInChIInstance();
    newtag.setEnabled(false);
    tags.put(Property.opentox_InChI, newtag);
    tags.put("InChI", newtag);
    tags.put(Property.getInChIInstance(), newtag);
    newtag = Property.getInChIKeyInstance();
    newtag.setEnabled(true);
    newtag.setName("InChIKey");
    tags.put(Property.opentox_InChIKey, newtag);
    tags.put(Property.getInChIKeyInstance(), newtag);
    newtag = Property.getInstance(CDKConstants.TITLE, CDKConstants.TITLE);
    newtag.setEnabled(false);
    tags.put(CDKConstants.TITLE, newtag);
    tags.put(newtag, newtag);
    newtag = Property.getInstance("CHEMBL", "CHEMBL");
    newtag.setEnabled(false);
    tags.put("CHEMBL", newtag);
    tags.put(newtag, newtag);
    final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
                cancel();
        }

        @Override
        public boolean skip(IStructureRecord input, IBatchStatistics stats) {
            return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
        }

        @Override
        public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
            super.onItemSkipped(input, stats);
            if (stats.isTimeToPrint(getSilentInterval() * 2))
                propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
        }

        @Override
        public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, "MSG_ERR", new Object[] { x.getMessage() });
        }

        @Override
        public long getSilentInterval() {
            return 30000L;
        }

        @Override
        public void close() throws Exception {
            try {
                writer.close();
            } catch (Exception x) {
            } finally {
            }
            super.close();
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {

        protected MoleculeReader molReader = new MoleculeReader(true, false);

        @Override
        public IStructureRecord process(IStructureRecord record) throws Exception {
            IAtomContainer mol;
            IAtomContainer processed = null;
            try {
                mol = molReader.process(record);
                if (mol != null) {
                    for (Property p : record.getRecordProperties()) {
                        Object v = record.getRecordProperty(p);
                        String pname = p.getName().replace("http://www.opentox.org/api/1.1#", "");
                        // already parsed
                        if (tags_to_keep != null && Arrays.binarySearch(tags_to_keep, pname) < 0)
                            continue;
                        else
                            mol.setProperty(p, v);
                    }
                    if (tags_to_keep != null) {
                        List<String> toRemove = null;
                        Iterator pi = mol.getProperties().keySet().iterator();
                        while (pi.hasNext()) {
                            Object p = pi.next();
                            if (Arrays.binarySearch(tags_to_keep, p.toString()) < 0) {
                                if (toRemove == null)
                                    toRemove = new ArrayList<String>();
                                toRemove.add(p.toString());
                            }
                        }
                        if (toRemove != null)
                            for (String propertyToRemove : toRemove) mol.removeProperty(propertyToRemove);
                    }
                } else {
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
                    return record;
                }
                AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
            } catch (Exception x) {
                logger_cli.log(Level.SEVERE, "MSG_ERR_MOLREAD", new Object[] { getIds(record), x.toString() });
                return record;
            } finally {
            }
            processed = mol;
            for (IFingerprinter fp : fps) {
                ICountFingerprint cfp = null;
                try {
                    cfp = fp.getCountFingerprint(processed);
                } catch (Exception x) {
                    logger.log(Level.FINER, x.getMessage());
                }
                IBitFingerprint bfp = null;
                try {
                    bfp = fp.getBitFingerprint(processed);
                } catch (Exception x) {
                }
                Map<String, Integer> fpraw = null;
                try {
                    if (fp_raw)
                        fpraw = fp.getRawFingerprint(processed);
                } catch (Exception x) {
                }
                try {
                    if (cfp != null) {
                        if (fp_count)
                            processed.setProperty(fp.getClass().getName() + ".count", cfp);
                        processed.setProperty(fp.getClass().getName(), cfp);
                    }
                    if (bfp != null)
                        processed.setProperty(fp.getClass().getName() + ".hashed", bfp);
                    if (fpraw != null)
                        processed.setProperty(fp.getClass().getName() + ".raw", fpraw);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                    if (processed != null)
                        processed.setProperty("ERROR." + fp.getClass().getName(), x.getMessage());
                } finally {
                    if (processed != null)
                        processed.addProperties(mol.getProperties());
                }
            }
            if (processed != null)
                try {
                    if (writesdf && sdf_title != null) {
                        for (Entry<Object, Object> p : processed.getProperties().entrySet()) if (sdf_title.equals(p.getKey().toString().toLowerCase())) {
                            processed.setProperty(CDKConstants.TITLE, p.getValue());
                            break;
                        }
                    }
                    StructureStandardizer.renameTags(processed, tags, true);
                    writer.write(processed);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                }
            else {
                logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
            }
            return record;
        }
    });
    batch.addPropertyChangeListener(new PropertyChangeListener() {

        @Override
        public void propertyChange(PropertyChangeEvent evt) {
            if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
                logger_cli.log(Level.INFO, evt.getNewValue().toString());
        }
    });
    /*
		 * standardprocessor.setCallback(new
		 * DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
		 * 
		 * @Override public IAtomContainer process(IAtomContainer target) throws
		 * Exception { try { //writer.write(target); } catch (Exception x) {
		 * logger.log(Level.SEVERE, x.getMessage()); } return target; } });
		 */
    IBatchStatistics stats = null;
    try {
        stats = batch.process(input);
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    } finally {
        try {
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        if (stats != null)
            logger_cli.log(Level.INFO, stats.toString());
    }
}
Also used : IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) PropertyChangeListener(java.beans.PropertyChangeListener) HashMap(java.util.HashMap) FileNotFoundException(java.io.FileNotFoundException) SDFWriter(org.openscience.cdk.io.SDFWriter) IFingerprinter(org.openscience.cdk.fingerprint.IFingerprinter) IChemObjectWriter(org.openscience.cdk.io.IChemObjectWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) MoleculeReader(ambit2.core.processors.structure.MoleculeReader) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Property(ambit2.base.data.Property) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) PropertyChangeEvent(java.beans.PropertyChangeEvent) FileOutputState(ambit2.core.io.FileOutputState) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) IProcessor(net.idea.modbcum.i.processors.IProcessor) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) FilesWithHeaderWriter(ambit2.core.io.FilesWithHeaderWriter) MultiFingerprintsWriter(ambit2.core.io.fp.MultiFingerprintsWriter) File(java.io.File) FileInputState(ambit2.core.io.FileInputState) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

Property (ambit2.base.data.Property)1 IStructureRecord (ambit2.base.interfaces.IStructureRecord)1 FileInputState (ambit2.core.io.FileInputState)1 FileOutputState (ambit2.core.io.FileOutputState)1 FilesWithHeaderWriter (ambit2.core.io.FilesWithHeaderWriter)1 MultiFingerprintsWriter (ambit2.core.io.fp.MultiFingerprintsWriter)1 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)1 BatchDBProcessor (ambit2.db.processors.BatchDBProcessor)1 PropertyChangeEvent (java.beans.PropertyChangeEvent)1 PropertyChangeListener (java.beans.PropertyChangeListener)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ConnectException (java.net.ConnectException)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1