Search in sources :

Example 1 with FP1024Writer

use of ambit2.db.processors.FP1024Writer in project ambit-mirror by ideaconsult.

the class Context method parseCommandPreprocessing.

public void parseCommandPreprocessing() throws Exception {
    int pagesize = parsePageSizeParam();
    Set<FPTable> preprocessingOption = new TreeSet<FPTable>();
    /*
		 * try { if ((Boolean)options.getParam(":pubchemfp"))
		 * preprocessingOption.add(FPTable.pc1024); } catch (Exception x) {
		 * x.printStackTrace(); }
		 */
    _preprocessingoptions[] po = _preprocessingoptions.values();
    for (_preprocessingoptions p : po) try {
        if ((Boolean) options.getParam(p.toString())) {
            FPTable[] to = p.getOption();
            for (FPTable t : to) preprocessingOption.add(t);
        }
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.toString());
    }
    DbReader<IStructureRecord> batch = new DbReader<IStructureRecord>() {

        /**
         */
        private static final long serialVersionUID = 6777121852891369530L;

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((stats.getRecords(RECORDS_STATS.RECORDS_READ) % 5000) == 0)
                try {
                    logger_cli.log(Level.INFO, stats.toString());
                    getConnection().commit();
                } catch (Exception x) {
                    logger_cli.log(Level.WARNING, x.getMessage());
                }
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, x.getMessage());
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    /* structure */
    RetrieveStructure queryP = new RetrieveStructure(true);
    queryP.setFieldname(true);
    queryP.setPageSize(1);
    queryP.setPage(0);
    MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition> strucReader = new MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition>(queryP) {

        /**
         */
        private static final long serialVersionUID = -5350168222668294207L;

        @Override
        protected void configureQuery(IStructureRecord target, IParameterizedQuery<IStructureRecord, IStructureRecord, IQueryCondition> query) throws AmbitException {
            query.setValue(target);
        // super.configureQuery(target, query);
        }

        @Override
        protected IStructureRecord processDetail(IStructureRecord master, IStructureRecord detail) throws Exception {
            master.setContent(detail.getContent());
            master.setFormat(detail.getFormat());
            master.setType(detail.getType());
            return master;
        }
    };
    strucReader.setCloseConnection(false);
    batch.getProcessorChain().add(strucReader);
    // preprocessing itself
    // query
    IQueryRetrieval<IStructureRecord> query = null;
    AbstractUpdate updateQuery = null;
    if (preprocessingOption.isEmpty())
        preprocessingOption.add(FPTable.inchi);
    if (preprocessingOption.contains(FPTable.inchi)) {
        query = new MissingInChIsQuery("UNKNOWN");
        updateQuery = new UpdateChemical();
        batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {

            /**
             */
            private static final long serialVersionUID = -7628269103516836861L;

            protected transient StructureNormalizer normalizer = new StructureNormalizer();

            @Override
            public IStructureRecord process(IStructureRecord record) throws Exception {
                try {
                    normalizer.process(record);
                    return record;
                } catch (Exception x) {
                    record.setType(STRUC_TYPE.NA);
                    return record;
                }
            }
        });
        batch.getProcessorChain().add(new AbstractUpdateProcessor<Object, IChemical>(OP.CREATE, updateQuery) {

            /**
             */
            private static final long serialVersionUID = 9019409150445247686L;

            @Override
            protected IChemical execute(Object group, IQueryUpdate<Object, IChemical> query) throws SQLException, OperationNotSupportedException, AmbitException {
                if (group instanceof IChemical)
                    query.setObject((IChemical) group);
                return super.execute(group, query);
            }
        });
    } else {
        // add generators
        if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
            query = new MissingFingerprintsQuery(FPTable.smarts_accelerator);
            batch.getProcessorChain().add(new SMARTSPropertiesGenerator());
        }
        if (preprocessingOption.contains(FPTable.fp1024)) {
            query = new FingerprintsByStatus(FPTable.fp1024);
            // updateQuery = new CreateFingerprintChemical(FPTable.fp1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.fp1024));
        }
        if (preprocessingOption.contains(FPTable.sk1024)) {
            query = new FingerprintsByStatus(FPTable.sk1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.sk1024));
        }
        // add writers
        if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
            batch.getProcessorChain().add(new SMARTSAcceleratorWriter());
        }
        if (preprocessingOption.contains(FPTable.fp1024)) {
            batch.getProcessorChain().add(new FP1024Writer(FPTable.fp1024));
        }
        if (preprocessingOption.contains(FPTable.sk1024)) {
            batch.getProcessorChain().add(new FP1024Writer(FPTable.sk1024));
        }
        if (preprocessingOption.contains(FPTable.cf1024)) {
            query = new FingerprintsByStatus(FPTable.cf1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.cf1024));
            batch.getProcessorChain().add(new FP1024Writer(FPTable.cf1024));
        }
    }
    batch.setHandlePrescreen(false);
    Connection c = null;
    DBConnectionConfigurable<Context> dbc = null;
    dbc = getConnection(options.getSQLConfig());
    c = dbc.getConnection();
    c.setAutoCommit(false);
    batch.setCloseConnection(true);
    batch.setConnection(c);
    batch.open();
    IBatchStatistics stats = null;
    try {
        query.setPageSize(pagesize);
        logger_cli.info(query.getSQL());
        try {
            disableIndices(batch.getConnection());
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        logger_cli.log(Level.INFO, "MSG_INFO_QUERY", pagesize);
        stats = batch.process(query);
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    } finally {
        try {
            batch.getConnection().commit();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            enableIndices(batch.getConnection());
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        if (stats != null)
            logger_cli.log(Level.INFO, stats.toString());
    }
}
Also used : IQueryCondition(net.idea.modbcum.i.IQueryCondition) StructureNormalizer(ambit2.core.processors.StructureNormalizer) SQLException(java.sql.SQLException) CliOptions._preprocessingoptions(ambit2.dbcli.CliOptions._preprocessingoptions) SMARTSPropertiesGenerator(ambit2.smarts.processors.SMARTSPropertiesGenerator) IParameterizedQuery(net.idea.modbcum.i.IParameterizedQuery) UpdateChemical(ambit2.db.update.chemical.UpdateChemical) IStructureRecord(ambit2.base.interfaces.IStructureRecord) BitSetGenerator(ambit2.descriptors.processors.BitSetGenerator) MissingInChIsQuery(ambit2.db.search.structure.MissingInChIsQuery) TreeSet(java.util.TreeSet) MasterDetailsProcessor(net.idea.modbcum.p.MasterDetailsProcessor) MissingFingerprintsQuery(ambit2.db.search.structure.MissingFingerprintsQuery) RetrieveStructure(ambit2.db.readers.RetrieveStructure) DbReader(ambit2.db.DbReader) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FPTable(ambit2.descriptors.processors.FPTable) SMARTSAcceleratorWriter(ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter) FP1024Writer(ambit2.db.processors.FP1024Writer) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) MySQLSingleConnection(net.idea.modbcum.c.MySQLSingleConnection) Connection(java.sql.Connection) IProcessor(net.idea.modbcum.i.processors.IProcessor) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) AbstractUpdate(net.idea.modbcum.q.update.AbstractUpdate) IChemical(ambit2.base.interfaces.IChemical) FingerprintsByStatus(ambit2.db.search.structure.FingerprintsByStatus) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Example 2 with FP1024Writer

use of ambit2.db.processors.FP1024Writer in project ambit-mirror by ideaconsult.

the class FP1024WriterTest method testProcess.

@Test
public void testProcess() throws Exception {
    BitSetGenerator generator = new BitSetGenerator(getMode());
    setUpDatabaseFromResource("ambit2/db/processors/test/dataset_nofp.xml");
    IDatabaseConnection dbConnection = getConnection();
    String query = "SELECT idchemical,idstructure,uncompress(structure) as c,format FROM structure";
    ITable chemicals = dbConnection.createQueryTable("EXPECTED_CHEMICALS", query);
    ITable fp = dbConnection.createQueryTable("EXPECTED_FP", String.format("SELECT * FROM %s where status='valid'", generator.getFpmode().getTable()));
    Assert.assertEquals(5, chemicals.getRowCount());
    Assert.assertEquals(0, fp.getRowCount());
    RepositoryReader reader = new RepositoryReader();
    RetrieveStructure molReader = new RetrieveStructure();
    reader.setConnection(dbConnection.getConnection());
    FP1024Writer fpWriter = new FP1024Writer(generator.getFpmode());
    fpWriter.setConnection(dbConnection.getConnection());
    fpWriter.open();
    reader.open();
    int records = 0;
    long now = System.currentTimeMillis();
    IChemObjectBuilder b = SilentChemObjectBuilder.getInstance();
    IStructureRecord o;
    QueryExecutor<RetrieveStructure> exec = new QueryExecutor<RetrieveStructure>();
    exec.setConnection(dbConnection.getConnection());
    int errors = 0;
    while (reader.hasNext()) {
        o = reader.next();
        String content = reader.getStructure(o.getIdstructure());
        if (content == null)
            continue;
        molReader.setValue(o);
        ResultSet rs = exec.process(molReader);
        while (rs.next()) {
            IStructureRecord record = molReader.getObject(rs);
            if (record == null) {
                errors++;
                continue;
            }
            long mark = System.currentTimeMillis();
            try {
                record = generator.process(record);
                fpWriter.write(record);
            } catch (Exception x) {
                logger.log(Level.SEVERE, x.getMessage(), x);
                errors++;
            }
        }
        rs.close();
        o.clear();
        records++;
    }
    reader.close();
    fp = dbConnection.createQueryTable("EXPECTED_FP", String.format("SELECT count(*) as c FROM %s where status = 'valid'", generator.getFpmode().getTable()));
    Assert.assertEquals(new BigInteger("4"), fp.getValue(0, "c"));
    fp = dbConnection.createQueryTable("EXPECTED_FP", String.format("SELECT * FROM %s", generator.getFpmode().getTable()));
    Assert.assertEquals(5, fp.getRowCount());
    fp = dbConnection.createQueryTable("EXPECTED_FP", String.format("SELECT count(*) as c FROM %s where status = 'error'", generator.getFpmode().getTable()));
    Assert.assertEquals(new BigInteger("1"), fp.getValue(0, "c"));
    fpWriter.close();
}
Also used : FP1024Writer(ambit2.db.processors.FP1024Writer) IChemObjectBuilder(org.openscience.cdk.interfaces.IChemObjectBuilder) BitSetGenerator(ambit2.descriptors.processors.BitSetGenerator) IStructureRecord(ambit2.base.interfaces.IStructureRecord) QueryExecutor(ambit2.db.search.QueryExecutor) ResultSet(java.sql.ResultSet) BigInteger(java.math.BigInteger) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) RetrieveStructure(ambit2.db.readers.RetrieveStructure) RepositoryReader(ambit2.db.RepositoryReader) Test(org.junit.Test)

Example 3 with FP1024Writer

use of ambit2.db.processors.FP1024Writer in project ambit-mirror by ideaconsult.

the class CallableFingerprintsCalculator method createProcessors.

@Override
protected ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> createProcessors() throws Exception {
    ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> p = new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>();
    RetrieveStructure r = new RetrieveStructure(true);
    r.setPageSize(1);
    r.setPage(0);
    p.add(new ProcessorStructureRetrieval(r));
    switch(getFingerprintsType()) {
        case fp1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case fp1024_struc:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FPStructureWriter());
                break;
            }
        case sk1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case cf1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case smarts_accelerator:
            {
                p.add(new SMARTSPropertiesGenerator());
                p.add(new SMARTSAcceleratorWriter());
                break;
            }
        /*
		 * case atomenvironments: { p.add(new AtomEnvironmentGenerator());
		 * p.add(new AtomEnvironmentWriter()); break; }
		 */
        case atomenvironments:
            {
                p.add(new AtomEnvironmentMatrixGenerator());
                p.add(new AtomEnvironmentMatrixWriter());
                break;
            }
        case aematrix:
            {
                p.add(new AtomEnvironmentMatrixGenerator());
                p.add(new AtomEnvironmentMatrixWriter());
                break;
            }
        case inchi:
            {
                p.add(new StructureNormalizer());
                p.add(new InChIChemicalsWriter());
                break;
            }
        default:
            {
                break;
            }
    }
    return p;
}
Also used : FPStructureWriter(ambit2.db.processors.quality.FPStructureWriter) SMARTSAcceleratorWriter(ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter) FP1024Writer(ambit2.db.processors.FP1024Writer) StructureNormalizer(ambit2.core.processors.StructureNormalizer) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) AtomEnvironmentMatrixGenerator(ambit2.descriptors.processors.AtomEnvironmentMatrixGenerator) IProcessor(net.idea.modbcum.i.processors.IProcessor) SMARTSPropertiesGenerator(ambit2.smarts.processors.SMARTSPropertiesGenerator) InChIChemicalsWriter(ambit2.db.update.chemical.InChIChemicalsWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) BitSetGenerator(ambit2.descriptors.processors.BitSetGenerator) AtomEnvironmentMatrixWriter(ambit2.db.update.fpae.AtomEnvironmentMatrixWriter) ProcessorsChain(net.idea.modbcum.i.processors.ProcessorsChain) ProcessorStructureRetrieval(ambit2.db.processors.ProcessorStructureRetrieval) RetrieveStructure(ambit2.db.readers.RetrieveStructure)

Aggregations

IStructureRecord (ambit2.base.interfaces.IStructureRecord)3 FP1024Writer (ambit2.db.processors.FP1024Writer)3 RetrieveStructure (ambit2.db.readers.RetrieveStructure)3 BitSetGenerator (ambit2.descriptors.processors.BitSetGenerator)3 StructureNormalizer (ambit2.core.processors.StructureNormalizer)2 SMARTSAcceleratorWriter (ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter)2 SMARTSPropertiesGenerator (ambit2.smarts.processors.SMARTSPropertiesGenerator)2 IBatchStatistics (net.idea.modbcum.i.batch.IBatchStatistics)2 IProcessor (net.idea.modbcum.i.processors.IProcessor)2 IChemical (ambit2.base.interfaces.IChemical)1 DbReader (ambit2.db.DbReader)1 RepositoryReader (ambit2.db.RepositoryReader)1 ProcessorStructureRetrieval (ambit2.db.processors.ProcessorStructureRetrieval)1 FPStructureWriter (ambit2.db.processors.quality.FPStructureWriter)1 QueryExecutor (ambit2.db.search.QueryExecutor)1 FingerprintsByStatus (ambit2.db.search.structure.FingerprintsByStatus)1 MissingFingerprintsQuery (ambit2.db.search.structure.MissingFingerprintsQuery)1 MissingInChIsQuery (ambit2.db.search.structure.MissingInChIsQuery)1 InChIChemicalsWriter (ambit2.db.update.chemical.InChIChemicalsWriter)1 UpdateChemical (ambit2.db.update.chemical.UpdateChemical)1