use of ambit2.core.io.RawIteratingFolderReader in project ambit-mirror by ideaconsult.
the class I5ParserTest method testi5dFolder.
@Test
public void testi5dFolder() throws Exception {
URL url = getClass().getClassLoader().getResource("ambit2/core/data/i5z/RefSub_030913110311");
File dir = new File(url.getFile());
File[] files = dir.listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.getName().endsWith("i5d");
}
});
Assert.assertEquals(20, files.length);
RawIteratingFolderReader reader = new RawIteratingFolderReader(files);
int count = 0;
int foundInChI = 0;
int foundCas = 0;
int foundName = 0;
while (reader.hasNext()) {
IStructureRecord record = reader.nextRecord();
count++;
if (record.getContent() != null && "INC".equals(record.getFormat())) {
foundInChI++;
System.out.println(record.getContent());
}
for (Property p : record.getRecordProperties()) {
foundCas += record.getRecordProperty(p).equals("59-87-0") ? 1 : 0;
foundName += record.getRecordProperty(p).equals("5-nitro-2-furaldehyde semicarbazone") ? 1 : 0;
System.out.println(p.getName() + " = " + record.getRecordProperty(p));
}
// Assert.assertNotNull(record.getSmiles());
// Assert.assertNotNull(record.getInchi());
// Assert.assertNotNull(record.getProperty(Property.getI5UUIDInstance()));
}
reader.close();
Assert.assertEquals(10, count);
Assert.assertEquals(10, foundInChI);
}
use of ambit2.core.io.RawIteratingFolderReader in project ambit-mirror by ideaconsult.
the class I5ParserTest method testi5z.
@Test
public void testi5z() throws Exception {
URL url = getClass().getClassLoader().getResource("ambit2/core/data/i5z/RefSub_030913110311.i5z");
IIteratingChemObjectReader ireader = FileInputState.getReader(new File(url.getFile()));
ireader.setErrorHandler(new IChemObjectReaderErrorHandler() {
@Override
public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
}
@Override
public void handleError(String message, int row, int colStart, int colEnd) {
}
@Override
public void handleError(String message, Exception exception) {
exception.printStackTrace();
}
@Override
public void handleError(String message) {
}
});
int count = 0;
int foundCas = 0;
int foundName = 0;
RawIteratingFolderReader reader = (RawIteratingFolderReader) ireader;
while (reader.hasNext()) {
IStructureRecord record = reader.nextRecord();
/*
for (Property p : record.getProperties())
System.out.println(String.format("%s [%s] = %s",p.getName(),p.getReference().getTitle(),record.getProperty(p)));
System.out.println();
*/
count++;
for (Property p : record.getRecordProperties()) {
foundCas += record.getRecordProperty(p).equals("59-87-0") ? 1 : 0;
foundName += record.getRecordProperty(p).equals("5-nitro-2-furaldehyde semicarbazone") ? 1 : 0;
System.out.println(p.getName() + " = " + record.getRecordProperty(p));
}
// Assert.assertNotNull(record.getSmiles());
// Assert.assertNotNull(record.getInchi());
// Assert.assertNotNull(record.getProperty(Property.getI5UUIDInstance()));
}
reader.close();
Assert.assertEquals(10, count);
Assert.assertEquals(1, foundCas);
Assert.assertEquals(1, foundName);
}
use of ambit2.core.io.RawIteratingFolderReader in project ambit-mirror by ideaconsult.
the class RawIteratingFolderReaderTest method test.
@Test
public void test() throws Exception {
File[] files = new File[] { new File("src/test/resources/ambit2/core/data/mdl/12027-77-9.sdf"), new File("src/test/resources/ambit2/core/data/mdl/12040-13-0.sdf"), new File("src/test/resources/ambit2/core/data/mdl/12042-37-4.sdf"), new File("src/test/resources/ambit2/core/data/mdl/12401-47-7.mol"), new File("src/test/resources/ambit2/core/data/mdl/12401-47-7.sdf"), new File("src/test/resources/ambit2/core/data/mdl/polymer.mol") };
int count = 0;
RawIteratingFolderReader reader = new RawIteratingFolderReader(files);
while (reader.hasNext()) {
logger.info(reader.next().toString());
count++;
}
reader.close();
Assert.assertEquals(6, count);
}
use of ambit2.core.io.RawIteratingFolderReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testWriteMultipleFiles_i5d.
@Test
public void testWriteMultipleFiles_i5d() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(0, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(0, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(0, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(0, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(0, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(0, property_values.getRowCount());
File dir = new File("src/test/resources/ambit2/db/processors/i5d");
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return !name.startsWith(".");
}
};
File[] files = dir.listFiles(filter);
Assert.assertEquals(3, files.length);
RawIteratingFolderReader reader = new RawIteratingFolderReader(files);
write(reader, c.getConnection(), new EINECSKey());
reader.close();
c.close();
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(3, chemicals.getRowCount());
// there are two empty file without $$$$ sign, which are skipped
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(3, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(3, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties join catalog_references using(idreference) where name='Names' and title in ('IUCLID5 SYNONYM#2','IUCLID5')");
Assert.assertEquals(2, property.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties join catalog_references using(idreference) order by name");
// Assert.assertEquals(34,property.getRowCount());
Assert.assertEquals(7, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(19, property_values.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(7, srcdataset.getRowCount());
ITable p_cas = c.createQueryTable("EXPECTED", "SELECT idchemical,idstructure,value FROM property_values join property_string using(idvalue_string) join properties using(idproperty) where name='CasRN'");
Assert.assertEquals(3, p_cas.getRowCount());
ITable p_ec = c.createQueryTable("EXPECTED", "SELECT idchemical,idstructure,value FROM property_values join property_string using(idvalue_string) join properties using(idproperty) where name='EC'");
Assert.assertEquals(3, p_ec.getRowCount());
ITable p_uuid = c.createQueryTable("EXPECTED", "SELECT idchemical,idstructure,value FROM property_values join property_string using(idvalue_string) join properties using(idproperty) where name='I5UUID'");
Assert.assertEquals(3, p_uuid.getRowCount());
c.close();
}
use of ambit2.core.io.RawIteratingFolderReader in project ambit-mirror by ideaconsult.
the class RepositoryWriterTest method testWriteMultipleFiles.
@Test
public void testWriteMultipleFiles() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(0, chemicals.getRowCount());
ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(0, strucs.getRowCount());
ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
Assert.assertEquals(0, srcdataset.getRowCount());
ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(0, struc_src.getRowCount());
ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
Assert.assertEquals(0, property.getRowCount());
ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(0, property_values.getRowCount());
File dir = new File("src/test/resources/ambit2/db/processors/sdf");
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return !name.startsWith(".");
}
};
File[] files = dir.listFiles(filter);
Assert.assertEquals(12, files.length);
RawIteratingFolderReader reader = new RawIteratingFolderReader(files);
write(reader, c.getConnection());
reader.close();
c.close();
c = getConnection();
chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
Assert.assertEquals(9, chemicals.getRowCount());
// there are two empty file without $$$$ sign, which are skipped
strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
Assert.assertEquals(17, strucs.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='TEST INPUT'");
Assert.assertEquals(1, srcdataset.getRowCount());
struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
Assert.assertEquals(17, struc_src.getRowCount());
property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
// Assert.assertEquals(34,property.getRowCount());
Assert.assertEquals(214, property.getRowCount());
property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
Assert.assertEquals(370, property_values.getRowCount());
srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset join template_def using(idtemplate) where name='TEST INPUT'");
Assert.assertEquals(214, srcdataset.getRowCount());
ITable p_cas = c.createQueryTable("EXPECTED", "SELECT idchemical,idstructure,name,value FROM structure join values_string using(idstructure) where name=\"CasRN\"");
Assert.assertEquals(12, p_cas.getRowCount());
c.close();
/**
* Removing redundant properties insert ignore into property_values
* select id,idproperty,idstructure,idvalue,idtype,user_name,status from
* property_values where idstructure>3 on duplicate key update
* idstructure=3 delete from property_values where idstructure>3
*
* insert ignore into struc_dataset select idstructure,id_srcdataset
* from struc_dataset where idstructure>3 on duplicate key update
* idstructure=3 delete from struc_dataset where idstructure>3
*/
}
Aggregations