use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.
the class Context method parseCommandSplit.
protected long parseCommandSplit(String subcommand, long now) throws Exception {
RawIteratingSDFReader reader = null;
Writer writer = null;
long chunksize = 10000;
JsonNode scmd = options.command.get(subcommand);
try {
JsonNode scommand = scmd.get("params");
JsonNode chunkNode = scommand.get(":chunk");
chunksize = Long.parseLong(chunkNode.get("value").textValue());
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.getMessage(), x);
}
int chunk = 1;
long chunk_started = System.currentTimeMillis();
try {
File file = new File(options.input);
File outdir = new File(options.output);
logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_SPLIT", new Object[] { file.getAbsoluteFile(), chunksize, outdir.getAbsolutePath() });
if (outdir.exists() && outdir.isDirectory()) {
reader = new RawIteratingSDFReader(new FileReader(file));
File outfile = new File(outdir, String.format("%d_%s", chunk, file.getName()));
chunk_started = System.currentTimeMillis();
logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNK", new Object[] { chunk, outfile.getAbsolutePath() });
writer = new FileWriter(outfile);
int records = 0;
while (reader.hasNext()) {
if (records >= chunksize) {
try {
if (writer != null)
writer.close();
} catch (Exception x) {
}
logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNKWRITTEN", new Object[] { chunk, (System.currentTimeMillis() - chunk_started) });
chunk++;
outfile = new File(outdir, String.format("%d_%s", chunk, file.getName()));
writer = new FileWriter(outfile);
records = 0;
chunk_started = System.currentTimeMillis();
logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNK", new Object[] { chunk, outfile.getAbsolutePath() });
}
IStructureRecord record = reader.nextRecord();
writer.write(record.getContent());
if ((records % 10000) == 0) {
System.out.print('.');
writer.flush();
}
records++;
}
return chunk;
} else
throw new Exception(String.format("ERROR: %s is not an existing directory.", options.output));
} catch (Exception x) {
throw x;
} finally {
logger_cli.log(Level.INFO, "MSG_INFO_COMPLETED", (System.currentTimeMillis() - now));
try {
if (reader != null)
reader.close();
} catch (Exception x) {
}
try {
if (writer != null)
writer.close();
logger_cli.log(Level.INFO, "MSG_INFO_COMMAND_CHUNKWRITTEN", new Object[] { chunk, (System.currentTimeMillis() - chunk_started) });
} catch (Exception x) {
}
if (options.output != null) {
logger_cli.log(Level.INFO, "MSG_INFO_RESULTSWRITTEN", options.output);
}
}
}
use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testWrite.
@Test
public void testWrite() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(0, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(0, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(0, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(0, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(0, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(0, property_values.getRowCount());
InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/test/input.sdf");
Assert.assertNotNull(in);
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
reader.setReference(LiteratureEntry.getInstance("input.sdf"));
write(reader, c.getConnection());
c.close();
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(5, chemicals.getRowCount());
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals where smiles is not null and inchi is not null and formula is not null");
Assert.assertEquals(3, chemicals.getRowCount());
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(7, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT id_srcdataset,idtemplate FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
// verifies if trigger insert_dataset_template works ok
Assert.assertNotNull(srcdataset.getValue(0, "idtemplate"));
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(7, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(72, property.getRowCount());
// verifies if insert_property_tuple works ok
property = c.createQueryTable("EXPECTED", "SELECT * FROM template_def join src_dataset using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(72, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(224, property_values.getRowCount());
c.close();
/**
* Removing redundant properties insert ignore into property_values
* select id,idproperty,idstructure,idvalue,idtype,user_name,status from
* property_values where idstructure>3 on duplicate key update
* idstructure=3 delete from property_values where idstructure>3
*
* insert ignore into struc_dataset select idstructure,id_srcdataset
* from struc_dataset where idstructure>3 on duplicate key update
* idstructure=3 delete from struc_dataset where idstructure>3
*/
}
use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testMarkush.
/**
* @TODO reenable with CDK > 1.5.11
*/
@Test
public void testMarkush() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(0, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(0, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(0, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(0, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(0, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(0, property_values.getRowCount());
InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/markush/68915-31-1.sdf");
Assert.assertNotNull(in);
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
reader.setReference(LiteratureEntry.getInstance("markush"));
write(reader, c.getConnection());
c.close();
// with cdk 1.5.12 it will write cdk:CtabSgroups as property!
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(1, chemicals.getRowCount());
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(1, strucs.getRowCount());
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure where type_structure='MARKUSH'");
Assert.assertEquals(1, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(1, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(19, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(19, property_values.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(19, srcdataset.getRowCount());
c.close();
}
use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testImportPropertiesByKey.
@Test
public void testImportPropertiesByKey() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/dataset-properties.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(5, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(5, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(1, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(2, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(4, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(4, property_values.getRowCount());
InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/cid/712.sdf");
Assert.assertNotNull(in);
// IIteratingChemObjectReader reader =
// FileInputState.getReader(in,".sdf");
ITable tuples = c.createQueryTable("EXPECTED", "SELECT * FROM tuples");
Assert.assertEquals(0, tuples.getRowCount());
IRawReader<IStructureRecord> reader = new RawIteratingSDFReader(new InputStreamReader(in));
// reader.setReference("predictions.sdf");
write((RawIteratingSDFReader) reader, c.getConnection(), new PubchemCID(), true);
// importProperties(reader,c.getConnection(),new PubchemCID());
c.close();
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(5, chemicals.getRowCount());
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(5, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset join src_dataset using(id_srcdataset) where name='TEST INPUT'");
Assert.assertEquals(1, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(42, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(42, property_values.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values where idstructure=100215");
Assert.assertEquals(2, property_values.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(38, srcdataset.getRowCount());
c.close();
/**
* Removing redundant properties insert ignore into property_values
* select id,idproperty,idstructure,idvalue,idtype,user_name,status from
* property_values where idstructure>3 on duplicate key update
* idstructure=3 delete from property_values where idstructure>3
*
* insert ignore into struc_dataset select idstructure,id_srcdataset
* from struc_dataset where idstructure>3 on duplicate key update
* idstructure=3 delete from struc_dataset where idstructure>3
*/
}
use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testMultiStrucSameSmiles.
@Test
public void testMultiStrucSameSmiles() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(0, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(0, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(0, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(0, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(0, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(0, property_values.getRowCount());
InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/processors/test/struc_cas.sdf");
Assert.assertNotNull(in);
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
reader.setReference(LiteratureEntry.getInstance("Multi strucsame smiles"));
write(reader, c.getConnection());
c.close();
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(3, chemicals.getRowCount());
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(3, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(3, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(12, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(36, property_values.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(12, srcdataset.getRowCount());
c.close();
/**
* Removing redundant properties insert ignore into property_values
* select id,idproperty,idstructure,idvalue,idtype,user_name,status from
* property_values where idstructure>3 on duplicate key update
* idstructure=3 delete from property_values where idstructure>3
*
* insert ignore into struc_dataset select idstructure,id_srcdataset
* from struc_dataset where idstructure>3 on duplicate key update
* idstructure=3 delete from struc_dataset where idstructure>3
*/
}
Aggregations