Search in sources :

Example 1 with StructureNormalizer

use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.

the class RawIteratingWrapperTest method testPubChemSubstance.

@Test
public void testPubChemSubstance() throws Exception {
    InputStream in = RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/pubchem/tox21_excerpt.sdf");
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("tox21.sdf"));
    Assert.assertTrue(reader != null);
    StructureNormalizer normalizer = new StructureNormalizer();
    int count = 0;
    int sid = 0;
    while (reader.hasNext()) {
        IStructureRecord record = (IStructureRecord) reader.next();
        IStructureRecord normalized = normalizer.process(record);
        for (Property p : normalized.getRecordProperties()) {
            if ("PUBCHEM_SID".equals(p.getName())) {
                Assert.assertNotNull(normalized.getRecordProperty(p));
                sid++;
            } else if ("PUBCHEM Name".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("DSSTox_GSID".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("CASRN".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("DSSTox_RID".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
        }
        count++;
    }
    Assert.assertEquals(3, sid);
    Assert.assertEquals(3, count);
    reader.close();
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) StructureNormalizer(ambit2.core.processors.StructureNormalizer) InputStream(java.io.InputStream) Property(ambit2.base.data.Property) Test(org.junit.Test)

Example 2 with StructureNormalizer

use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.

the class InChIChemicalsTableWriterTest method testProcess.

@Test
public void testProcess() throws Exception {
    StructureNormalizer normalizer = new StructureNormalizer();
    // no inchis as well
    setUpDatabaseFromResource("ambit2/db/processors/test/dataset_nofp.xml");
    IDatabaseConnection dbConnection = getConnection();
    ITable chemicals = dbConnection.createQueryTable("EXPECTED_FP", "SELECT * FROM chemicals where inchi is null");
    Assert.assertEquals(5, chemicals.getRowCount());
    RepositoryReader reader = new RepositoryReader();
    RetrieveStructure molReader = new RetrieveStructure(true);
    reader.setConnection(dbConnection.getConnection());
    InChIChemicalsWriter inchiWriter = new InChIChemicalsWriter();
    inchiWriter.setConnection(dbConnection.getConnection());
    inchiWriter.open();
    reader.open();
    int records = 0;
    IStructureRecord o;
    QueryExecutor<RetrieveStructure> exec = new QueryExecutor<RetrieveStructure>();
    exec.setConnection(dbConnection.getConnection());
    int errors = 0;
    while (reader.hasNext()) {
        o = reader.next();
        String content = reader.getStructure(o.getIdstructure());
        if (content == null)
            continue;
        molReader.setValue(o);
        ResultSet rs = exec.process(molReader);
        while (rs.next()) {
            IStructureRecord record = molReader.getObject(rs);
            try {
                record = normalizer.process(record);
                inchiWriter.process(record);
            } catch (Exception x) {
                errors++;
            }
        }
        rs.close();
        o.clear();
        records++;
    }
    reader.close();
    chemicals = dbConnection.createQueryTable("EXPECTED_FP", "SELECT count(*) as c FROM chemicals where inchi is not null");
    Assert.assertEquals(new BigInteger("4"), chemicals.getValue(0, "c"));
    inchiWriter.close();
}
Also used : StructureNormalizer(ambit2.core.processors.StructureNormalizer) InChIChemicalsWriter(ambit2.db.update.chemical.InChIChemicalsWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) QueryExecutor(ambit2.db.search.QueryExecutor) ResultSet(java.sql.ResultSet) BigInteger(java.math.BigInteger) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) RetrieveStructure(ambit2.db.readers.RetrieveStructure) RepositoryReader(ambit2.db.RepositoryReader) Test(org.junit.Test)

Example 3 with StructureNormalizer

use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.

the class RawIteratingWrapperTest method testDX.

@Test
public void testDX() throws Exception {
    InputStream in = RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/data/dx/predictions.sdf");
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("predictions.sdf"));
    Assert.assertTrue(reader != null);
    DXParser dxParser = new DXParser();
    StructureNormalizer normalizer = new StructureNormalizer();
    int count = 0;
    while (reader.hasNext()) {
        IStructureRecord record = (IStructureRecord) reader.next();
        IStructureRecord normalized = normalizer.process(record);
        // System.out.println(normalized.getContent());
        int rr = 0;
        for (Property p : normalized.getRecordProperties()) {
            rr++;
            // System.out.println(String.format(">>\t%s\t=%s\t%s", p.getName(),normalized.getRecordProperty(p),p.getLabel()));
            Assert.assertNotNull(normalized.getRecordProperty(p));
            Assert.assertNotNull(p.getLabel());
            Assert.assertNotNull(p.getName());
            if (p.getName().startsWith("DX.")) {
                Assert.assertTrue(p.getLabel().startsWith("http://www.opentox.org/echaEndpoints.owl#"));
            // System.out.println(p.getAnnotations());
            }
        }
        Assert.assertEquals(27, rr);
        /*
			 * normalized = dxParser.process(normalized); for (Property p :
			 * normalized.getProperties()) {
			 * System.out.println(String.format("DX>>\t%s\t=%s",
			 * p.getName(),normalized.getProperty(p)));
			 * Assert.assertNotNull(normalized.getProperty(p)); }
			 */
        count++;
    }
    Assert.assertEquals(1, count);
    reader.close();
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) StructureNormalizer(ambit2.core.processors.StructureNormalizer) InputStream(java.io.InputStream) DXParser(ambit2.core.io.dx.DXParser) Property(ambit2.base.data.Property) Test(org.junit.Test)

Example 4 with StructureNormalizer

use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.

the class Context method parseCommandPreprocessing.

public void parseCommandPreprocessing() throws Exception {
    int pagesize = parsePageSizeParam();
    Set<FPTable> preprocessingOption = new TreeSet<FPTable>();
    /*
		 * try { if ((Boolean)options.getParam(":pubchemfp"))
		 * preprocessingOption.add(FPTable.pc1024); } catch (Exception x) {
		 * x.printStackTrace(); }
		 */
    _preprocessingoptions[] po = _preprocessingoptions.values();
    for (_preprocessingoptions p : po) try {
        if ((Boolean) options.getParam(p.toString())) {
            FPTable[] to = p.getOption();
            for (FPTable t : to) preprocessingOption.add(t);
        }
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.toString());
    }
    DbReader<IStructureRecord> batch = new DbReader<IStructureRecord>() {

        /**
         */
        private static final long serialVersionUID = 6777121852891369530L;

        @Override
        public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
            super.onItemRead(input, stats);
            if ((stats.getRecords(RECORDS_STATS.RECORDS_READ) % 5000) == 0)
                try {
                    logger_cli.log(Level.INFO, stats.toString());
                    getConnection().commit();
                } catch (Exception x) {
                    logger_cli.log(Level.WARNING, x.getMessage());
                }
        }

        @Override
        public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
            super.onError(input, output, stats, x);
            logger_cli.log(Level.SEVERE, x.getMessage());
        }
    };
    batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
    /* structure */
    RetrieveStructure queryP = new RetrieveStructure(true);
    queryP.setFieldname(true);
    queryP.setPageSize(1);
    queryP.setPage(0);
    MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition> strucReader = new MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition>(queryP) {

        /**
         */
        private static final long serialVersionUID = -5350168222668294207L;

        @Override
        protected void configureQuery(IStructureRecord target, IParameterizedQuery<IStructureRecord, IStructureRecord, IQueryCondition> query) throws AmbitException {
            query.setValue(target);
        // super.configureQuery(target, query);
        }

        @Override
        protected IStructureRecord processDetail(IStructureRecord master, IStructureRecord detail) throws Exception {
            master.setContent(detail.getContent());
            master.setFormat(detail.getFormat());
            master.setType(detail.getType());
            return master;
        }
    };
    strucReader.setCloseConnection(false);
    batch.getProcessorChain().add(strucReader);
    // preprocessing itself
    // query
    IQueryRetrieval<IStructureRecord> query = null;
    AbstractUpdate updateQuery = null;
    if (preprocessingOption.isEmpty())
        preprocessingOption.add(FPTable.inchi);
    if (preprocessingOption.contains(FPTable.inchi)) {
        query = new MissingInChIsQuery("UNKNOWN");
        updateQuery = new UpdateChemical();
        batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {

            /**
             */
            private static final long serialVersionUID = -7628269103516836861L;

            protected transient StructureNormalizer normalizer = new StructureNormalizer();

            @Override
            public IStructureRecord process(IStructureRecord record) throws Exception {
                try {
                    normalizer.process(record);
                    return record;
                } catch (Exception x) {
                    record.setType(STRUC_TYPE.NA);
                    return record;
                }
            }
        });
        batch.getProcessorChain().add(new AbstractUpdateProcessor<Object, IChemical>(OP.CREATE, updateQuery) {

            /**
             */
            private static final long serialVersionUID = 9019409150445247686L;

            @Override
            protected IChemical execute(Object group, IQueryUpdate<Object, IChemical> query) throws SQLException, OperationNotSupportedException, AmbitException {
                if (group instanceof IChemical)
                    query.setObject((IChemical) group);
                return super.execute(group, query);
            }
        });
    } else {
        // add generators
        if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
            query = new MissingFingerprintsQuery(FPTable.smarts_accelerator);
            batch.getProcessorChain().add(new SMARTSPropertiesGenerator());
        }
        if (preprocessingOption.contains(FPTable.fp1024)) {
            query = new FingerprintsByStatus(FPTable.fp1024);
            // updateQuery = new CreateFingerprintChemical(FPTable.fp1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.fp1024));
        }
        if (preprocessingOption.contains(FPTable.sk1024)) {
            query = new FingerprintsByStatus(FPTable.sk1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.sk1024));
        }
        // add writers
        if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
            batch.getProcessorChain().add(new SMARTSAcceleratorWriter());
        }
        if (preprocessingOption.contains(FPTable.fp1024)) {
            batch.getProcessorChain().add(new FP1024Writer(FPTable.fp1024));
        }
        if (preprocessingOption.contains(FPTable.sk1024)) {
            batch.getProcessorChain().add(new FP1024Writer(FPTable.sk1024));
        }
        if (preprocessingOption.contains(FPTable.cf1024)) {
            query = new FingerprintsByStatus(FPTable.cf1024);
            batch.getProcessorChain().add(new BitSetGenerator(FPTable.cf1024));
            batch.getProcessorChain().add(new FP1024Writer(FPTable.cf1024));
        }
    }
    batch.setHandlePrescreen(false);
    Connection c = null;
    DBConnectionConfigurable<Context> dbc = null;
    dbc = getConnection(options.getSQLConfig());
    c = dbc.getConnection();
    c.setAutoCommit(false);
    batch.setCloseConnection(true);
    batch.setConnection(c);
    batch.open();
    IBatchStatistics stats = null;
    try {
        query.setPageSize(pagesize);
        logger_cli.info(query.getSQL());
        try {
            disableIndices(batch.getConnection());
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        logger_cli.log(Level.INFO, "MSG_INFO_QUERY", pagesize);
        stats = batch.process(query);
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    } finally {
        try {
            batch.getConnection().commit();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            enableIndices(batch.getConnection());
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        try {
            if (batch != null)
                batch.close();
        } catch (Exception x) {
            logger_cli.warning(x.getMessage());
        }
        if (stats != null)
            logger_cli.log(Level.INFO, stats.toString());
    }
}
Also used : IQueryCondition(net.idea.modbcum.i.IQueryCondition) StructureNormalizer(ambit2.core.processors.StructureNormalizer) SQLException(java.sql.SQLException) CliOptions._preprocessingoptions(ambit2.dbcli.CliOptions._preprocessingoptions) SMARTSPropertiesGenerator(ambit2.smarts.processors.SMARTSPropertiesGenerator) IParameterizedQuery(net.idea.modbcum.i.IParameterizedQuery) UpdateChemical(ambit2.db.update.chemical.UpdateChemical) IStructureRecord(ambit2.base.interfaces.IStructureRecord) BitSetGenerator(ambit2.descriptors.processors.BitSetGenerator) MissingInChIsQuery(ambit2.db.search.structure.MissingInChIsQuery) TreeSet(java.util.TreeSet) MasterDetailsProcessor(net.idea.modbcum.p.MasterDetailsProcessor) MissingFingerprintsQuery(ambit2.db.search.structure.MissingFingerprintsQuery) RetrieveStructure(ambit2.db.readers.RetrieveStructure) DbReader(ambit2.db.DbReader) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FPTable(ambit2.descriptors.processors.FPTable) SMARTSAcceleratorWriter(ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter) FP1024Writer(ambit2.db.processors.FP1024Writer) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) MySQLSingleConnection(net.idea.modbcum.c.MySQLSingleConnection) Connection(java.sql.Connection) IProcessor(net.idea.modbcum.i.processors.IProcessor) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) AbstractUpdate(net.idea.modbcum.q.update.AbstractUpdate) IChemical(ambit2.base.interfaces.IChemical) FingerprintsByStatus(ambit2.db.search.structure.FingerprintsByStatus) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Example 5 with StructureNormalizer

use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.

the class CallableFingerprintsCalculator method createProcessors.

@Override
protected ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> createProcessors() throws Exception {
    ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> p = new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>();
    RetrieveStructure r = new RetrieveStructure(true);
    r.setPageSize(1);
    r.setPage(0);
    p.add(new ProcessorStructureRetrieval(r));
    switch(getFingerprintsType()) {
        case fp1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case fp1024_struc:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FPStructureWriter());
                break;
            }
        case sk1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case cf1024:
            {
                p.add(new BitSetGenerator(getFingerprintsType()));
                p.add(new FP1024Writer(getFingerprintsType()));
                break;
            }
        case smarts_accelerator:
            {
                p.add(new SMARTSPropertiesGenerator());
                p.add(new SMARTSAcceleratorWriter());
                break;
            }
        /*
		 * case atomenvironments: { p.add(new AtomEnvironmentGenerator());
		 * p.add(new AtomEnvironmentWriter()); break; }
		 */
        case atomenvironments:
            {
                p.add(new AtomEnvironmentMatrixGenerator());
                p.add(new AtomEnvironmentMatrixWriter());
                break;
            }
        case aematrix:
            {
                p.add(new AtomEnvironmentMatrixGenerator());
                p.add(new AtomEnvironmentMatrixWriter());
                break;
            }
        case inchi:
            {
                p.add(new StructureNormalizer());
                p.add(new InChIChemicalsWriter());
                break;
            }
        default:
            {
                break;
            }
    }
    return p;
}
Also used : FPStructureWriter(ambit2.db.processors.quality.FPStructureWriter) SMARTSAcceleratorWriter(ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter) FP1024Writer(ambit2.db.processors.FP1024Writer) StructureNormalizer(ambit2.core.processors.StructureNormalizer) IBatchStatistics(net.idea.modbcum.i.batch.IBatchStatistics) AtomEnvironmentMatrixGenerator(ambit2.descriptors.processors.AtomEnvironmentMatrixGenerator) IProcessor(net.idea.modbcum.i.processors.IProcessor) SMARTSPropertiesGenerator(ambit2.smarts.processors.SMARTSPropertiesGenerator) InChIChemicalsWriter(ambit2.db.update.chemical.InChIChemicalsWriter) IStructureRecord(ambit2.base.interfaces.IStructureRecord) BitSetGenerator(ambit2.descriptors.processors.BitSetGenerator) AtomEnvironmentMatrixWriter(ambit2.db.update.fpae.AtomEnvironmentMatrixWriter) ProcessorsChain(net.idea.modbcum.i.processors.ProcessorsChain) ProcessorStructureRetrieval(ambit2.db.processors.ProcessorStructureRetrieval) RetrieveStructure(ambit2.db.readers.RetrieveStructure)

Aggregations

IStructureRecord (ambit2.base.interfaces.IStructureRecord)5 StructureNormalizer (ambit2.core.processors.StructureNormalizer)5 RetrieveStructure (ambit2.db.readers.RetrieveStructure)3 Property (ambit2.base.data.Property)2 RawIteratingSDFReader (ambit2.core.io.RawIteratingSDFReader)2 FP1024Writer (ambit2.db.processors.FP1024Writer)2 InChIChemicalsWriter (ambit2.db.update.chemical.InChIChemicalsWriter)2 SMARTSAcceleratorWriter (ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter)2 BitSetGenerator (ambit2.descriptors.processors.BitSetGenerator)2 SMARTSPropertiesGenerator (ambit2.smarts.processors.SMARTSPropertiesGenerator)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2 IBatchStatistics (net.idea.modbcum.i.batch.IBatchStatistics)2 IProcessor (net.idea.modbcum.i.processors.IProcessor)2 Test (org.junit.Test)2 IChemical (ambit2.base.interfaces.IChemical)1 DXParser (ambit2.core.io.dx.DXParser)1 DbReader (ambit2.db.DbReader)1 RepositoryReader (ambit2.db.RepositoryReader)1 ProcessorStructureRetrieval (ambit2.db.processors.ProcessorStructureRetrieval)1