Search in sources :

Example 1 with FileOutputState

use of ambit2.core.io.FileOutputState in project ambit-mirror by ideaconsult.

the class Context method parseCommandFingerprints.

public void parseCommandFingerprints(String subcommand, long now) throws Exception {
    boolean multifile = true;
    int page = parsePageParam();
    int pagesize = parsePageSizeParam();
    final boolean fp_count = parseWriteCountParam();
    final boolean fp_raw = parseWriteRawParam();
    String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
    String inchi_header = parseInputTag_Param("inchi", "InChI");
    String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
    Object tmpTag = parseSdfTitleParam();
    final String[] tags_to_keep = parsetags_to_keep();
    final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
    final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
    final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
    final File file = getInputFile();
    FileInputState input = new FileInputState(file);
    input.setOptionalSMILESHeader(smiles_header);
    input.setOptionalInChIHeader(inchi_header);
    input.setOptionalInChIKeyHeader(inchikey_header);
    if (options.output == null)
        throw new FileNotFoundException("Output file not specified. Please use -o {file}");
    final File outfile = new File(options.output);
    logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
    final List<IFingerprinter> fps = parseFingerprinterParams();
    FileOutputState out = null;
    final IChemObjectWriter awriter;
    if (multifile) {
        awriter = new MultiFingerprintsWriter(outfile, fps, tags_to_keep);
    } else {
        out = new FileOutputState(outfile);
        awriter = out.getWriter();
        if (awriter instanceof FilesWithHeaderWriter)
            ((FilesWithHeaderWriter) awriter).setAddSMILEScolumn(false);
    }
    final IChemObjectWriter writer = awriter;
    final boolean writesdf = writer instanceof SDFWriter;
    final Map<Object, Property> tags = new HashMap<>();
    Property newtag = Property.getSMILESInstance();
    newtag.setName("SMILES");
    newtag.setEnabled(false);
    tags.put(Property.opentox_SMILES, newtag);
    tags.put(Property.getSMILESInstance(), newtag);
    newtag = Property.getInChIInstance();
    newtag.setEnabled(false);
    tags.put(Property.opentox_InChI, newtag);
    tags.put("InChI", newtag);
    tags.put(Property.getInChIInstance(), newtag);
    newtag = Property.getInChIKeyInstance();
    newtag.setEnabled(true);
    newtag.setName("InChIKey");
    tags.put(Property.opentox_InChIKey, newtag);
    tags.put(Property.getInChIKeyInstance(), newtag);
    newtag = Property.getInstance(CDKConstants.TITLE, CDKConstants.TITLE);
    newtag.setEnabled(false);
    tags.put(CDKConstants.TITLE, newtag);
    tags.put(newtag, newtag);
    newtag = Property.getInstance("CHEMBL", "CHEMBL");
    newtag.setEnabled(false);
    tags.put("CHEMBL", newtag);
    tags.put(newtag, newtag);
    final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
                cancel();
        }

        @Override
        public boolean skip(IStructureRecord input, IBatchStatistics stats) {
            return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
        }

        @Override
        public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
            super.onItemSkipped(input, stats);
            if (stats.isTimeToPrint(getSilentInterval() * 2))
                propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
        }

        @Override
        public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, "MSG_ERR", new Object[] { x.getMessage() });
        }

        @Override
        public long getSilentInterval() {
            return 30000L;
        }

        @Override
        public void close() throws Exception {
            try {
                writer.close();
            } catch (Exception x) {
            } finally {
            }
            super.close();
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {

        protected MoleculeReader molReader = new MoleculeReader(true, false);

        @Override
        public IStructureRecord process(IStructureRecord record) throws Exception {
            IAtomContainer mol;
            IAtomContainer processed = null;
            try {
                mol = molReader.process(record);
                if (mol != null) {
                    for (Property p : record.getRecordProperties()) {
                        Object v = record.getRecordProperty(p);
                        String pname = p.getName().replace("http://www.opentox.org/api/1.1#", "");
                        // already parsed
                        if (tags_to_keep != null && Arrays.binarySearch(tags_to_keep, pname) < 0)
                            continue;
                        else
                            mol.setProperty(p, v);
                    }
                    if (tags_to_keep != null) {
                        List<String> toRemove = null;
                        Iterator pi = mol.getProperties().keySet().iterator();
                        while (pi.hasNext()) {
                            Object p = pi.next();
                            if (Arrays.binarySearch(tags_to_keep, p.toString()) < 0) {
                                if (toRemove == null)
                                    toRemove = new ArrayList<String>();
                                toRemove.add(p.toString());
                            }
                        }
                        if (toRemove != null)
                            for (String propertyToRemove : toRemove) mol.removeProperty(propertyToRemove);
                    }
                } else {
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
                    return record;
                }
                AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
            } catch (Exception x) {
                logger_cli.log(Level.SEVERE, "MSG_ERR_MOLREAD", new Object[] { getIds(record), x.toString() });
                return record;
            } finally {
            }
            processed = mol;
            for (IFingerprinter fp : fps) {
                ICountFingerprint cfp = null;
                try {
                    cfp = fp.getCountFingerprint(processed);
                } catch (Exception x) {
                    logger.log(Level.FINER, x.getMessage());
                }
                IBitFingerprint bfp = null;
                try {
                    bfp = fp.getBitFingerprint(processed);
                } catch (Exception x) {
                }
                Map<String, Integer> fpraw = null;
                try {
                    if (fp_raw)
                        fpraw = fp.getRawFingerprint(processed);
                } catch (Exception x) {
                }
                try {
                    if (cfp != null) {
                        if (fp_count)
                            processed.setProperty(fp.getClass().getName() + ".count", cfp);
                        processed.setProperty(fp.getClass().getName(), cfp);
                    }
                    if (bfp != null)
                        processed.setProperty(fp.getClass().getName() + ".hashed", bfp);
                    if (fpraw != null)
                        processed.setProperty(fp.getClass().getName() + ".raw", fpraw);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                    if (processed != null)
                        processed.setProperty("ERROR." + fp.getClass().getName(), x.getMessage());
                } finally {
                    if (processed != null)
                        processed.addProperties(mol.getProperties());
                }
            }
            if (processed != null)
                try {
                    if (writesdf && sdf_title != null) {
                        for (Entry<Object, Object> p : processed.getProperties().entrySet()) if (sdf_title.equals(p.getKey().toString().toLowerCase())) {
                            processed.setProperty(CDKConstants.TITLE, p.getValue());
                            break;
                        }
                    }
                    StructureStandardizer.renameTags(processed, tags, true);
                    writer.write(processed);
                } catch (Exception x) {
                    // StringWriter w = new StringWriter();
                    // x.printStackTrace(new PrintWriter(w));
                    logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
                }
            else {
                logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
            }
            return record;
        }
    });
    batch.addPropertyChangeListener(new PropertyChangeListener() {

        @Override
        public void propertyChange(PropertyChangeEvent evt) {
            if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
                logger_cli.log(Level.INFO, evt.getNewValue().toString());
        }
    });
    /*
		 * standardprocessor.setCallback(new
		 * DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
		 * 
		 * @Override public IAtomContainer process(IAtomContainer target) throws
		 * Exception { try { //writer.write(target); } catch (Exception x) {
		 * logger.log(Level.SEVERE, x.getMessage()); } return target; } });
		 */
    IBatchStatistics stats = null;
    try {
        stats = batch.process(input);
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    } finally {
        try {
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        if (stats != null)
            logger_cli.log(Level.INFO, stats.toString());
    }
}
Also used : IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) PropertyChangeListener(java.beans.PropertyChangeListener) HashMap(java.util.HashMap) FileNotFoundException(java.io.FileNotFoundException) SDFWriter(org.openscience.cdk.io.SDFWriter) IFingerprinter(org.openscience.cdk.fingerprint.IFingerprinter) IChemObjectWriter(org.openscience.cdk.io.IChemObjectWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) MoleculeReader(ambit2.core.processors.structure.MoleculeReader) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Property(ambit2.base.data.Property) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) PropertyChangeEvent(java.beans.PropertyChangeEvent) FileOutputState(ambit2.core.io.FileOutputState) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) IProcessor(net.idea.modbcum.i.processors.IProcessor) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) FilesWithHeaderWriter(ambit2.core.io.FilesWithHeaderWriter) MultiFingerprintsWriter(ambit2.core.io.fp.MultiFingerprintsWriter) File(java.io.File) FileInputState(ambit2.core.io.FileInputState) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with FileOutputState

use of ambit2.core.io.FileOutputState in project ambit-mirror by ideaconsult.

the class AmbitPipeline method parseCommandStandardize.

public void parseCommandStandardize(String subcommand, long now) throws Exception {
    int page = parsePageParam();
    int pagesize = parsePageSizeParam();
    Object tmpTag = parseSdfTitleParam();
    String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
    String inchi_header = parseInputTag_Param("inchi", "InChI");
    String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
    final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
    final StructureStandardizer standardprocessor = new StructureStandardizer(logger_cli);
    standardprocessor.setGenerate2D(parseBooleanParam(":generate2D", false));
    standardprocessor.setGenerateTautomers(parseBooleanParam(":tautomers", false));
    SMIRKSProcessor tmp = null;
    try {
        Object o = options.getParam(":smirks");
        if (o != null) {
            File smirksConfig = new File(o.toString());
            if (smirksConfig.exists()) {
                tmp = new SMIRKSProcessor(smirksConfig, logger_cli);
                tmp.setEnabled(true);
            } else
                logger_cli.log(Level.WARNING, "SMIRKS transformation file not found");
        }
    } catch (Exception x) {
        logger_cli.log(Level.SEVERE, x.getMessage());
        tmp = null;
    }
    final SMIRKSProcessor smirksProcessor = tmp;
    standardprocessor.setSplitFragments(parseBooleanParam(":splitfragments", false));
    standardprocessor.setImplicitHydrogens(parseBooleanParam(":implicith", false));
    standardprocessor.setNeutralise(parseBooleanParam(":neutralise", false));
    final String[] tags_to_keep = parsetags_to_keep();
    standardprocessor.setRankTag(parseStringParam(":tag_rank", "RANK"));
    standardprocessor.setInchiTag(parseStringParam(":tag_inchi", "InChI"));
    standardprocessor.setInchiKeyTag(parseStringParam(":tag_inchikey", "InChIKey"));
    standardprocessor.setSMILESTag(parseStringParam(":tag_smiles", "SMILES"));
    standardprocessor.setGenerateInChI(parseBooleanParam(":inchi", true));
    standardprocessor.setGenerateSMILES(parseBooleanParam(":smiles", true));
    standardprocessor.setGenerateSMILES_Canonical(parseBooleanParam(":smilescanonical", false));
    standardprocessor.setGenerateSMILES_Aromatic(parseBooleanParam(":smilesaromatic", false));
    standardprocessor.setGenerateStereofrom2D(parseBooleanParam(":generatestereofrom2d", false));
    standardprocessor.setClearIsotopes(parseBooleanParam(":setClearIsotopes", false));
    final boolean debugatomtypes = parseBooleanParam(":debugatomtypes", false);
    final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
    final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
    final File file = getInputFile();
    FileInputState in = new FileInputState(file);
    in.setOptionalInChIHeader(inchi_header);
    in.setOptionalInChIKeyHeader(inchikey_header);
    in.setOptionalSMILESHeader(smiles_header);
    if (options.output == null)
        throw new FileNotFoundException("Output file not specified. Please use -o {file}");
    final File outfile = new File(options.output);
    logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
    FileOutputState out = new FileOutputState(outfile);
    final IChemObjectWriter writer = out.getWriter();
    if (writer instanceof FilesWithHeaderWriter)
        ((FilesWithHeaderWriter) writer).setAddSMILEScolumn(false);
    final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
                cancel();
        }

        @Override
        public boolean skip(IStructureRecord input, IBatchStatistics stats) {
            return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
        }

        @Override
        public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
            super.onItemSkipped(input, stats);
            if (stats.isTimeToPrint(getSilentInterval() * 2))
                propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
        }

        @Override
        public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, x.getMessage());
        }

        @Override
        public long getSilentInterval() {
            return 30000L;
        }

        @Override
        public void close() throws Exception {
            try {
                writer.close();
            } catch (Exception x) {
            } finally {
            }
            super.close();
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    batch.getProcessorChain().add(new StdzBatchProcessor(standardprocessor, smirksProcessor, tags_to_keep, logger_cli, writer, sdf_title, debugatomtypes));
    batch.addPropertyChangeListener(new PropertyChangeListener() {

        @Override
        public void propertyChange(PropertyChangeEvent evt) {
            if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
                logger_cli.log(Level.INFO, evt.getNewValue().toString());
        }
    });
    /*
		 * standardprocessor.setCallback(new
		 * DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
		 * 
		 * @Override public IAtomContainer process(IAtomContainer target) throws
		 * Exception { try { //writer.write(target); } catch (Exception x) {
		 * logger.log(Level.SEVERE, x.getMessage()); } return target; } });
		 */
    IBatchStatistics stats = null;
    try {
        stats = batch.process(in);
    } catch (Exception x) {
        StringWriter w = new StringWriter();
        x.printStackTrace(new PrintWriter(w));
        logger_cli.log(Level.WARNING, "MSG_ERR", new Object[] { x.getMessage() });
        logger_cli.log(Level.FINE, "MSG_ERR_DEBUG", new Object[] { x.getMessage(), w.toString() });
    } finally {
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.log(Level.WARNING, "MSG_ERR", new Object[] { x.getMessage() });
        }
        if (stats != null)
            logger_cli.log(Level.INFO, "MSG_INFO", new Object[] { stats.toString() });
    }
}
Also used : PropertyChangeListener(java.beans.PropertyChangeListener) FileNotFoundException(java.io.FileNotFoundException) IChemObjectWriter(org.openscience.cdk.io.IChemObjectWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) StringWriter(java.io.StringWriter) PrintWriter(java.io.PrintWriter) PropertyChangeEvent(java.beans.PropertyChangeEvent) FileOutputState(ambit2.core.io.FileOutputState) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) IProcessor(net.idea.modbcum.i.processors.IProcessor) FileNotFoundException(java.io.FileNotFoundException) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) StructureStandardizer(ambit2.tautomers.processor.StructureStandardizer) StdzBatchProcessor(ambit2.dbcli.processor.StdzBatchProcessor) FilesWithHeaderWriter(ambit2.core.io.FilesWithHeaderWriter) SMIRKSProcessor(ambit2.smarts.processors.SMIRKSProcessor) File(java.io.File) FileInputState(ambit2.core.io.FileInputState)

Aggregations

IStructureRecord (ambit2.base.interfaces.IStructureRecord)2 FileInputState (ambit2.core.io.FileInputState)2 FileOutputState (ambit2.core.io.FileOutputState)2 FilesWithHeaderWriter (ambit2.core.io.FilesWithHeaderWriter)2 BatchDBProcessor (ambit2.db.processors.BatchDBProcessor)2 PropertyChangeEvent (java.beans.PropertyChangeEvent)2 PropertyChangeListener (java.beans.PropertyChangeListener)2 File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2 IBatchStatistics (net.idea.modbcum.i.batch.IBatchStatistics)2 IProcessor (net.idea.modbcum.i.processors.IProcessor)2 IChemObjectWriter (org.openscience.cdk.io.IChemObjectWriter)2 Property (ambit2.base.data.Property)1 MultiFingerprintsWriter (ambit2.core.io.fp.MultiFingerprintsWriter)1 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)1 StdzBatchProcessor (ambit2.dbcli.processor.StdzBatchProcessor)1 SMIRKSProcessor (ambit2.smarts.processors.SMIRKSProcessor)1 StructureStandardizer (ambit2.tautomers.processor.StructureStandardizer)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1