Search in sources :

Example 21 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class Context method parseCommandSplit.

protected long parseCommandSplit(String subcommand, long now) throws Exception {
    RawIteratingSDFReader reader = null;
    Writer writer = null;
    long chunksize = 10000;
    JsonNode scmd = options.command.get(subcommand);
    try {
        JsonNode scommand = scmd.get("params");
        JsonNode chunkNode = scommand.get(":chunk");
        chunksize = Long.parseLong(chunkNode.get("value").textValue());
    } catch (Exception x) {
        logger_cli.log(Level.WARNING, x.getMessage(), x);
    }
    int chunk = 1;
    long chunk_started = System.currentTimeMillis();
    try {
        File file = new File(options.input);
        File outdir = new File(options.output);
        logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_SPLIT", new Object[] { file.getAbsoluteFile(), chunksize, outdir.getAbsolutePath() });
        if (outdir.exists() && outdir.isDirectory()) {
            reader = new RawIteratingSDFReader(new FileReader(file));
            File outfile = new File(outdir, String.format("%d_%s", chunk, file.getName()));
            chunk_started = System.currentTimeMillis();
            logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNK", new Object[] { chunk, outfile.getAbsolutePath() });
            writer = new FileWriter(outfile);
            int records = 0;
            while (reader.hasNext()) {
                if (records >= chunksize) {
                    try {
                        if (writer != null)
                            writer.close();
                    } catch (Exception x) {
                    }
                    logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNKWRITTEN", new Object[] { chunk, (System.currentTimeMillis() - chunk_started) });
                    chunk++;
                    outfile = new File(outdir, String.format("%d_%s", chunk, file.getName()));
                    writer = new FileWriter(outfile);
                    records = 0;
                    chunk_started = System.currentTimeMillis();
                    logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNK", new Object[] { chunk, outfile.getAbsolutePath() });
                }
                IStructureRecord record = reader.nextRecord();
                writer.write(record.getContent());
                if ((records % 10000) == 0) {
                    System.out.print('.');
                    writer.flush();
                }
                records++;
            }
            return chunk;
        } else
            throw new Exception(String.format("ERROR: %s is not an existing directory.", options.output));
    } catch (Exception x) {
        throw x;
    } finally {
        logger_cli.log(Level.INFO, "MSG_INFO_COMPLETED", (System.currentTimeMillis() - now));
        try {
            if (reader != null)
                reader.close();
        } catch (Exception x) {
        }
        try {
            if (writer != null)
                writer.close();
            logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNKWRITTEN", new Object[] { chunk, (System.currentTimeMillis() - chunk_started) });
        } catch (Exception x) {
        }
        if (options.output != null) {
            logger_cli.log(Level.INFO, "MSG_INFO_RESULTSWRITTEN", options.output);
        }
    }
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) FileWriter(java.io.FileWriter) JsonNode(com.fasterxml.jackson.databind.JsonNode) IteratingDelimitedFileReader(ambit2.core.io.IteratingDelimitedFileReader) FileReader(java.io.FileReader) File(java.io.File) SMARTSAcceleratorWriter(ambit2.db.update.qlabel.smarts.SMARTSAcceleratorWriter) FP1024Writer(ambit2.db.processors.FP1024Writer) FilesWithHeaderWriter(ambit2.core.io.FilesWithHeaderWriter) IChemObjectWriter(org.openscience.cdk.io.IChemObjectWriter) Writer(java.io.Writer) MultiFingerprintsWriter(ambit2.core.io.fp.MultiFingerprintsWriter) RepositoryWriter(ambit2.db.processors.RepositoryWriter) SDFWriter(org.openscience.cdk.io.SDFWriter) FileWriter(java.io.FileWriter) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) OperationNotSupportedException(javax.naming.OperationNotSupportedException) FileNotFoundException(java.io.FileNotFoundException) ICountFingerprint(org.openscience.cdk.fingerprint.ICountFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint)

Example 22 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RepositoryWriterTest method testWrite.

@Test
public void testWrite() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(0, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(0, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(0, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(0, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(0, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(0, property_values.getRowCount());
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/test/input.sdf");
    Assert.assertNotNull(in);
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("input.sdf"));
    write(reader, c.getConnection());
    c.close();
    c = getConnection();
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(5, chemicals.getRowCount());
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals where smiles is not null and inchi is not null and formula is not null");
    Assert.assertEquals(3, chemicals.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(7, strucs.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT id_srcdataset,idtemplate FROM src_dataset where name='TEST INPUT'");
    Assert.assertEquals(1, srcdataset.getRowCount());
    // verifies if trigger insert_dataset_template works ok
    Assert.assertNotNull(srcdataset.getValue(0, "idtemplate"));
    struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(7, struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(72, property.getRowCount());
    // verifies if insert_property_tuple works ok
    property = c.createQueryTable("EXPECTED", "SELECT * FROM template_def join src_dataset using(idtemplate) where name='TEST INPUT'");
    Assert.assertEquals(72, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(224, property_values.getRowCount());
    c.close();
/**
 * Removing redundant properties insert ignore into property_values
 * select id,idproperty,idstructure,idvalue,idtype,user_name,status from
 * property_values where idstructure>3 on duplicate key update
 * idstructure=3 delete from property_values where idstructure>3
 *
 * insert ignore into struc_dataset select idstructure,id_srcdataset
 * from struc_dataset where idstructure>3 on duplicate key update
 * idstructure=3 delete from struc_dataset where idstructure>3
 */
}
Also used : RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) Test(org.junit.Test)

Example 23 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RepositoryWriterTest method testMarkush.

/**
 * @TODO reenable with CDK > 1.5.11
 */
@Test
public void testMarkush() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(0, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(0, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(0, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(0, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(0, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(0, property_values.getRowCount());
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/markush/68915-31-1.sdf");
    Assert.assertNotNull(in);
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("markush"));
    write(reader, c.getConnection());
    c.close();
    // with cdk 1.5.12 it will write cdk:CtabSgroups as property!
    c = getConnection();
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(1, chemicals.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(1, strucs.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure where type_structure='MARKUSH'");
    Assert.assertEquals(1, strucs.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
    Assert.assertEquals(1, srcdataset.getRowCount());
    struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(1, struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(19, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(19, property_values.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
    Assert.assertEquals(19, srcdataset.getRowCount());
    c.close();
}
Also used : RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) Test(org.junit.Test)

Example 24 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RepositoryWriterTest method testImportPropertiesByKey.

@Test
public void testImportPropertiesByKey() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/dataset-properties.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(5, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(5, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(1, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(2, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(4, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(4, property_values.getRowCount());
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/cid/712.sdf");
    Assert.assertNotNull(in);
    // IIteratingChemObjectReader reader =
    // FileInputState.getReader(in,".sdf");
    ITable tuples = c.createQueryTable("EXPECTED", "SELECT * FROM tuples");
    Assert.assertEquals(0, tuples.getRowCount());
    IRawReader<IStructureRecord> reader = new RawIteratingSDFReader(new InputStreamReader(in));
    // reader.setReference("predictions.sdf");
    write((RawIteratingSDFReader) reader, c.getConnection(), new PubchemCID(), true);
    // importProperties(reader,c.getConnection(),new PubchemCID());
    c.close();
    c = getConnection();
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(5, chemicals.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(5, strucs.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
    Assert.assertEquals(1, srcdataset.getRowCount());
    struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset join src_dataset using(id_srcdataset) where name='TEST INPUT'");
    Assert.assertEquals(1, struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(42, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(42, property_values.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values where idstructure=100215");
    Assert.assertEquals(2, property_values.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
    Assert.assertEquals(38, srcdataset.getRowCount());
    c.close();
/**
 * Removing redundant properties insert ignore into property_values
 * select id,idproperty,idstructure,idvalue,idtype,user_name,status from
 * property_values where idstructure>3 on duplicate key update
 * idstructure=3 delete from property_values where idstructure>3
 *
 * insert ignore into struc_dataset select idstructure,id_srcdataset
 * from struc_dataset where idstructure>3 on duplicate key update
 * idstructure=3 delete from struc_dataset where idstructure>3
 */
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) PubchemCID(ambit2.core.processors.structure.key.PubchemCID) InputStream(java.io.InputStream) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) Test(org.junit.Test)

Example 25 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RepositoryWriterTest method testMultiStrucSameSmiles.

@Test
public void testMultiStrucSameSmiles() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(0, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(0, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(0, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(0, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(0, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(0, property_values.getRowCount());
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/test/struc_cas.sdf");
    Assert.assertNotNull(in);
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("Multi strucsame smiles"));
    write(reader, c.getConnection());
    c.close();
    c = getConnection();
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(3, chemicals.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(3, strucs.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
    Assert.assertEquals(1, srcdataset.getRowCount());
    struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(3, struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(12, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(36, property_values.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
    Assert.assertEquals(12, srcdataset.getRowCount());
    c.close();
/**
 * Removing redundant properties insert ignore into property_values
 * select id,idproperty,idstructure,idvalue,idtype,user_name,status from
 * property_values where idstructure>3 on duplicate key update
 * idstructure=3 delete from property_values where idstructure>3
 *
 * insert ignore into struc_dataset select idstructure,id_srcdataset
 * from struc_dataset where idstructure>3 on duplicate key update
 * idstructure=3 delete from struc_dataset where idstructure>3
 */
}
Also used : RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) Test(org.junit.Test)

Aggregations

RawIteratingSDFReader (ambit2.core.io.RawIteratingSDFReader)30 InputStreamReader (java.io.InputStreamReader)26 Test (org.junit.Test)21 IStructureRecord (ambit2.base.interfaces.IStructureRecord)19 InputStream (java.io.InputStream)18 IDatabaseConnection (org.dbunit.database.IDatabaseConnection)13 ITable (org.dbunit.dataset.ITable)12 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)7 FileReader (java.io.FileReader)6 IAtomContainer (org.openscience.cdk.interfaces.IAtomContainer)6 File (java.io.File)5 IIteratingChemObjectReader (org.openscience.cdk.io.iterator.IIteratingChemObjectReader)5 IOException (java.io.IOException)4 AmbitException (net.idea.modbcum.i.exceptions.AmbitException)4 AtomConfigurator (ambit2.core.processors.structure.AtomConfigurator)3 BitSet (java.util.BitSet)3 GZIPInputStream (java.util.zip.GZIPInputStream)3 Property (ambit2.base.data.Property)2 StructureRecord (ambit2.base.data.StructureRecord)2 IteratingDelimitedFileReader (ambit2.core.io.IteratingDelimitedFileReader)2