use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.
the class RawIteratingWrapperTest method testPubChemSubstance.
@Test
public void testPubChemSubstance() throws Exception {
InputStream in = RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/pubchem/tox21_excerpt.sdf");
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
reader.setReference(LiteratureEntry.getInstance("tox21.sdf"));
Assert.assertTrue(reader != null);
StructureNormalizer normalizer = new StructureNormalizer();
int count = 0;
int sid = 0;
while (reader.hasNext()) {
IStructureRecord record = (IStructureRecord) reader.next();
IStructureRecord normalized = normalizer.process(record);
for (Property p : normalized.getRecordProperties()) {
if ("PUBCHEM_SID".equals(p.getName())) {
Assert.assertNotNull(normalized.getRecordProperty(p));
sid++;
} else if ("PUBCHEM Name".equals(p.getName()))
Assert.assertNotNull(normalized.getRecordProperty(p));
else if ("DSSTox_GSID".equals(p.getName()))
Assert.assertNotNull(normalized.getRecordProperty(p));
else if ("CASRN".equals(p.getName()))
Assert.assertNotNull(normalized.getRecordProperty(p));
else if ("DSSTox_RID".equals(p.getName()))
Assert.assertNotNull(normalized.getRecordProperty(p));
}
count++;
}
Assert.assertEquals(3, sid);
Assert.assertEquals(3, count);
reader.close();
}
use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.
the class InChIChemicalsTableWriterTest method testProcess.
@Test
public void testProcess() throws Exception {
StructureNormalizer normalizer = new StructureNormalizer();
// no inchis as well
setUpDatabaseFromResource("ambit2/db/processors/test/dataset_nofp.xml");
IDatabaseConnection dbConnection = getConnection();
ITable chemicals = dbConnection.createQueryTable("EXPECTED_FP", "SELECT * FROM chemicals where inchi is null");
Assert.assertEquals(5, chemicals.getRowCount());
RepositoryReader reader = new RepositoryReader();
RetrieveStructure molReader = new RetrieveStructure(true);
reader.setConnection(dbConnection.getConnection());
InChIChemicalsWriter inchiWriter = new InChIChemicalsWriter();
inchiWriter.setConnection(dbConnection.getConnection());
inchiWriter.open();
reader.open();
int records = 0;
IStructureRecord o;
QueryExecutor<RetrieveStructure> exec = new QueryExecutor<RetrieveStructure>();
exec.setConnection(dbConnection.getConnection());
int errors = 0;
while (reader.hasNext()) {
o = reader.next();
String content = reader.getStructure(o.getIdstructure());
if (content == null)
continue;
molReader.setValue(o);
ResultSet rs = exec.process(molReader);
while (rs.next()) {
IStructureRecord record = molReader.getObject(rs);
try {
record = normalizer.process(record);
inchiWriter.process(record);
} catch (Exception x) {
errors++;
}
}
rs.close();
o.clear();
records++;
}
reader.close();
chemicals = dbConnection.createQueryTable("EXPECTED_FP", "SELECT count(*) as c FROM chemicals where inchi is not null");
Assert.assertEquals(new BigInteger("4"), chemicals.getValue(0, "c"));
inchiWriter.close();
}
use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.
the class RawIteratingWrapperTest method testDX.
@Test
public void testDX() throws Exception {
InputStream in = RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/data/dx/predictions.sdf");
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
reader.setReference(LiteratureEntry.getInstance("predictions.sdf"));
Assert.assertTrue(reader != null);
DXParser dxParser = new DXParser();
StructureNormalizer normalizer = new StructureNormalizer();
int count = 0;
while (reader.hasNext()) {
IStructureRecord record = (IStructureRecord) reader.next();
IStructureRecord normalized = normalizer.process(record);
// System.out.println(normalized.getContent());
int rr = 0;
for (Property p : normalized.getRecordProperties()) {
rr++;
// System.out.println(String.format(">>\t%s\t=%s\t%s", p.getName(),normalized.getRecordProperty(p),p.getLabel()));
Assert.assertNotNull(normalized.getRecordProperty(p));
Assert.assertNotNull(p.getLabel());
Assert.assertNotNull(p.getName());
if (p.getName().startsWith("DX.")) {
Assert.assertTrue(p.getLabel().startsWith("http://www.opentox.org/echaEndpoints.owl#"));
// System.out.println(p.getAnnotations());
}
}
Assert.assertEquals(27, rr);
/*
* normalized = dxParser.process(normalized); for (Property p :
* normalized.getProperties()) {
* System.out.println(String.format("DX>>\t%s\t=%s",
* p.getName(),normalized.getProperty(p)));
* Assert.assertNotNull(normalized.getProperty(p)); }
*/
count++;
}
Assert.assertEquals(1, count);
reader.close();
}
use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.
the class Context method parseCommandPreprocessing.
public void parseCommandPreprocessing() throws Exception {
int pagesize = parsePageSizeParam();
Set<FPTable> preprocessingOption = new TreeSet<FPTable>();
/*
* try { if ((Boolean)options.getParam(":pubchemfp"))
* preprocessingOption.add(FPTable.pc1024); } catch (Exception x) {
* x.printStackTrace(); }
*/
_preprocessingoptions[] po = _preprocessingoptions.values();
for (_preprocessingoptions p : po) try {
if ((Boolean) options.getParam(p.toString())) {
FPTable[] to = p.getOption();
for (FPTable t : to) preprocessingOption.add(t);
}
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.toString());
}
DbReader<IStructureRecord> batch = new DbReader<IStructureRecord>() {
/**
*/
private static final long serialVersionUID = 6777121852891369530L;
@Override
public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
super.onItemRead(input, stats);
if ((stats.getRecords(RECORDS_STATS.RECORDS_READ) % 5000) == 0)
try {
logger_cli.log(Level.INFO, stats.toString());
getConnection().commit();
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.getMessage());
}
}
@Override
public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
super.onError(input, output, stats, x);
logger_cli.log(Level.SEVERE, x.getMessage());
}
};
batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
/* structure */
RetrieveStructure queryP = new RetrieveStructure(true);
queryP.setFieldname(true);
queryP.setPageSize(1);
queryP.setPage(0);
MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition> strucReader = new MasterDetailsProcessor<IStructureRecord, IStructureRecord, IQueryCondition>(queryP) {
/**
*/
private static final long serialVersionUID = -5350168222668294207L;
@Override
protected void configureQuery(IStructureRecord target, IParameterizedQuery<IStructureRecord, IStructureRecord, IQueryCondition> query) throws AmbitException {
query.setValue(target);
// super.configureQuery(target, query);
}
@Override
protected IStructureRecord processDetail(IStructureRecord master, IStructureRecord detail) throws Exception {
master.setContent(detail.getContent());
master.setFormat(detail.getFormat());
master.setType(detail.getType());
return master;
}
};
strucReader.setCloseConnection(false);
batch.getProcessorChain().add(strucReader);
// preprocessing itself
// query
IQueryRetrieval<IStructureRecord> query = null;
AbstractUpdate updateQuery = null;
if (preprocessingOption.isEmpty())
preprocessingOption.add(FPTable.inchi);
if (preprocessingOption.contains(FPTable.inchi)) {
query = new MissingInChIsQuery("UNKNOWN");
updateQuery = new UpdateChemical();
batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {
/**
*/
private static final long serialVersionUID = -7628269103516836861L;
protected transient StructureNormalizer normalizer = new StructureNormalizer();
@Override
public IStructureRecord process(IStructureRecord record) throws Exception {
try {
normalizer.process(record);
return record;
} catch (Exception x) {
record.setType(STRUC_TYPE.NA);
return record;
}
}
});
batch.getProcessorChain().add(new AbstractUpdateProcessor<Object, IChemical>(OP.CREATE, updateQuery) {
/**
*/
private static final long serialVersionUID = 9019409150445247686L;
@Override
protected IChemical execute(Object group, IQueryUpdate<Object, IChemical> query) throws SQLException, OperationNotSupportedException, AmbitException {
if (group instanceof IChemical)
query.setObject((IChemical) group);
return super.execute(group, query);
}
});
} else {
// add generators
if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
query = new MissingFingerprintsQuery(FPTable.smarts_accelerator);
batch.getProcessorChain().add(new SMARTSPropertiesGenerator());
}
if (preprocessingOption.contains(FPTable.fp1024)) {
query = new FingerprintsByStatus(FPTable.fp1024);
// updateQuery = new CreateFingerprintChemical(FPTable.fp1024);
batch.getProcessorChain().add(new BitSetGenerator(FPTable.fp1024));
}
if (preprocessingOption.contains(FPTable.sk1024)) {
query = new FingerprintsByStatus(FPTable.sk1024);
batch.getProcessorChain().add(new BitSetGenerator(FPTable.sk1024));
}
// add writers
if (preprocessingOption.contains(FPTable.smarts_accelerator)) {
batch.getProcessorChain().add(new SMARTSAcceleratorWriter());
}
if (preprocessingOption.contains(FPTable.fp1024)) {
batch.getProcessorChain().add(new FP1024Writer(FPTable.fp1024));
}
if (preprocessingOption.contains(FPTable.sk1024)) {
batch.getProcessorChain().add(new FP1024Writer(FPTable.sk1024));
}
if (preprocessingOption.contains(FPTable.cf1024)) {
query = new FingerprintsByStatus(FPTable.cf1024);
batch.getProcessorChain().add(new BitSetGenerator(FPTable.cf1024));
batch.getProcessorChain().add(new FP1024Writer(FPTable.cf1024));
}
}
batch.setHandlePrescreen(false);
Connection c = null;
DBConnectionConfigurable<Context> dbc = null;
dbc = getConnection(options.getSQLConfig());
c = dbc.getConnection();
c.setAutoCommit(false);
batch.setCloseConnection(true);
batch.setConnection(c);
batch.open();
IBatchStatistics stats = null;
try {
query.setPageSize(pagesize);
logger_cli.info(query.getSQL());
try {
disableIndices(batch.getConnection());
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
logger_cli.log(Level.INFO, "MSG_INFO_QUERY", pagesize);
stats = batch.process(query);
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.getMessage(), x);
} finally {
try {
batch.getConnection().commit();
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
try {
enableIndices(batch.getConnection());
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
try {
if (batch != null)
batch.close();
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
if (stats != null)
logger_cli.log(Level.INFO, stats.toString());
}
}
use of ambit2.core.processors.StructureNormalizer in project ambit-mirror by ideaconsult.
the class CallableFingerprintsCalculator method createProcessors.
@Override
protected ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> createProcessors() throws Exception {
ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor> p = new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>();
RetrieveStructure r = new RetrieveStructure(true);
r.setPageSize(1);
r.setPage(0);
p.add(new ProcessorStructureRetrieval(r));
switch(getFingerprintsType()) {
case fp1024:
{
p.add(new BitSetGenerator(getFingerprintsType()));
p.add(new FP1024Writer(getFingerprintsType()));
break;
}
case fp1024_struc:
{
p.add(new BitSetGenerator(getFingerprintsType()));
p.add(new FPStructureWriter());
break;
}
case sk1024:
{
p.add(new BitSetGenerator(getFingerprintsType()));
p.add(new FP1024Writer(getFingerprintsType()));
break;
}
case cf1024:
{
p.add(new BitSetGenerator(getFingerprintsType()));
p.add(new FP1024Writer(getFingerprintsType()));
break;
}
case smarts_accelerator:
{
p.add(new SMARTSPropertiesGenerator());
p.add(new SMARTSAcceleratorWriter());
break;
}
/*
* case atomenvironments: { p.add(new AtomEnvironmentGenerator());
* p.add(new AtomEnvironmentWriter()); break; }
*/
case atomenvironments:
{
p.add(new AtomEnvironmentMatrixGenerator());
p.add(new AtomEnvironmentMatrixWriter());
break;
}
case aematrix:
{
p.add(new AtomEnvironmentMatrixGenerator());
p.add(new AtomEnvironmentMatrixWriter());
break;
}
case inchi:
{
p.add(new StructureNormalizer());
p.add(new InChIChemicalsWriter());
break;
}
default:
{
break;
}
}
return p;
}
Aggregations