Search in sources :

Example 1 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RawIteratingFolderReader method getItemReader.

protected IRawReader<IStructureRecord> getItemReader(int index) throws Exception {
    String name = files[index].getName().toLowerCase();
    if (name.endsWith(FileInputState._FILE_TYPE.SDF_INDEX.getExtension())) {
        RawIteratingSDFReader r = new RawIteratingSDFReader(new FileReader(files[index]));
        r.setReference(LiteratureEntry.getInstance(files[index].getName(), "file:///" + files[index].getAbsolutePath()));
        return (IRawReader<IStructureRecord>) r;
    } else if (name.endsWith(FileInputState._FILE_TYPE.MOL_INDEX.getExtension())) {
        RawIteratingMOLReader r = new RawIteratingMOLReader(new FileReader(files[index]));
        r.setReference(LiteratureEntry.getInstance(files[index].getName(), "file:///" + files[index].getAbsolutePath()));
        return (IRawReader<IStructureRecord>) r;
    } else if (name.endsWith(FileInputState._FILE_TYPE.I5D_INDEX.getExtension())) {
        IIteratingChemObjectReader r = FileInputState.getI5DReader(files[index]);
        if (r instanceof ICiteable) {
            ((ICiteable) r).setReference(LiteratureEntry.getI5UUIDReference());
        }
        return (IRawReader<IStructureRecord>) r;
    } else if (name.endsWith(FileInputState._FILE_TYPE.ZIP_INDEX.getExtension())) {
        return new ZipReader(files[index]);
    } else if (name.endsWith(FileInputState._FILE_TYPE.GZ_INDEX.getExtension())) {
        // assuming gzipped sdf only...
        InputStreamReader reader = new InputStreamReader(new GZIPInputStream(new FileInputStream(files[index])));
        return new RawIteratingSDFReader(reader);
    } else
        throw new Exception("Unsupported format " + name);
}
Also used : InputStreamReader(java.io.InputStreamReader) IIteratingChemObjectReader(org.openscience.cdk.io.iterator.IIteratingChemObjectReader) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) IStructureRecord(ambit2.base.interfaces.IStructureRecord) ICiteable(ambit2.base.interfaces.ICiteable) FileReader(java.io.FileReader)

Example 2 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class FingerPrintGeneratorTest method test.

@Test
public void test() throws Exception {
    BigInteger[] expected = /*{
				new BigInteger("1126174803623952"), 
				new BigInteger("1152961362175993860"), 
				new BigInteger("2305843011903291728"),
				new BigInteger("25769820160"), 
				new BigInteger("1099511627776"),
				new BigInteger("33554496"), 
				new BigInteger("8864814662912"),
				new BigInteger("4803285265416200"), 
				new BigInteger("9223372050830336000"),
				new BigInteger("2454675102173515776"),
				new BigInteger("1152921504611041536"),
				new BigInteger("155684661740175875"), 
				new BigInteger("6917529131798265856"), 
				new BigInteger("310748377023266816"),
				new BigInteger("67110400"), 
				new BigInteger("2305843009213710336")};
				*/
    { new BigInteger("576462128137896992"), new BigInteger("10376293541473171584"), new BigInteger("2305843009214747777"), new BigInteger("72127962782121984"), new BigInteger("52776558133248"), new BigInteger("0"), new BigInteger("338649581355264"), new BigInteger("4366794756"), new BigInteger("17730770043904"), new BigInteger("2314887591865356288"), new BigInteger("2305844109263241488"), new BigInteger("2400709834665132544"), new BigInteger("13835058125075449856"), new BigInteger("10741891072"), new BigInteger("9007208196998144"), new BigInteger("18014398511644672") };
    MoleculeReader molreader = new MoleculeReader();
    FingerprintGenerator gen = new FingerprintGenerator(new Fingerprinter());
    InputStream in = FingerPrintGeneratorTest.class.getClassLoader().getResourceAsStream("ambit2/core/data/fp/fptest.mol");
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    while (reader.hasNext()) {
        IStructureRecord record = reader.nextRecord();
        BitSet bs1 = gen.process(molreader.process(record));
        BigInteger[] h16 = new BigInteger[16];
        MoleculeTools.bitset2bigint16(bs1, 64, h16);
        new Random(10000);
        BitSet bs2 = gen.process(molreader.process(record));
        Assert.assertEquals(bs1, bs2);
        MoleculeTools.bitset2bigint16(bs2, 64, h16);
        for (int i = 0; i < 16; i++) {
            Assert.assertEquals(expected[i], h16[i]);
        }
    }
/*
		IStructureRecord record = new StructureRecord();
		record.setContent();
		MoleculeReader reader = new MoleculeReader();
		reader.process(target)
		*/
}
Also used : RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) BitSet(java.util.BitSet) FingerprintGenerator(ambit2.core.processors.structure.FingerprintGenerator) IStructureRecord(ambit2.base.interfaces.IStructureRecord) MoleculeReader(ambit2.core.processors.structure.MoleculeReader) MACCSFingerprinter(org.openscience.cdk.fingerprint.MACCSFingerprinter) ExtendedFingerprinter(org.openscience.cdk.fingerprint.ExtendedFingerprinter) SubstructureFingerprinter(org.openscience.cdk.fingerprint.SubstructureFingerprinter) PubchemFingerprinter(org.openscience.cdk.fingerprint.PubchemFingerprinter) Fingerprinter(org.openscience.cdk.fingerprint.Fingerprinter) HybridizationFingerprinter(org.openscience.cdk.fingerprint.HybridizationFingerprinter) EStateFingerprinter(org.openscience.cdk.fingerprint.EStateFingerprinter) Random(java.util.Random) BigInteger(java.math.BigInteger) Test(org.junit.Test)

Example 3 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RawIteratingWrapperTest method testPubChemSubstance.

@Test
public void testPubChemSubstance() throws Exception {
    InputStream in = RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/pubchem/tox21_excerpt.sdf");
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    reader.setReference(LiteratureEntry.getInstance("tox21.sdf"));
    Assert.assertTrue(reader != null);
    StructureNormalizer normalizer = new StructureNormalizer();
    int count = 0;
    int sid = 0;
    while (reader.hasNext()) {
        IStructureRecord record = (IStructureRecord) reader.next();
        IStructureRecord normalized = normalizer.process(record);
        for (Property p : normalized.getRecordProperties()) {
            if ("PUBCHEM_SID".equals(p.getName())) {
                Assert.assertNotNull(normalized.getRecordProperty(p));
                sid++;
            } else if ("PUBCHEM Name".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("DSSTox_GSID".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("CASRN".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
            else if ("DSSTox_RID".equals(p.getName()))
                Assert.assertNotNull(normalized.getRecordProperty(p));
        }
        count++;
    }
    Assert.assertEquals(3, sid);
    Assert.assertEquals(3, count);
    reader.close();
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) StructureNormalizer(ambit2.core.processors.StructureNormalizer) InputStream(java.io.InputStream) Property(ambit2.base.data.Property) Test(org.junit.Test)

Example 4 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class SplitSDFTest method split.

public int split(File file) throws Exception {
    FileReader reader = new FileReader(file);
    RawIteratingSDFReader iterator = new RawIteratingSDFReader(reader);
    int recordNo = 0;
    String molFile = file.getName();
    final String ECTAG = "> <EC>";
    final String CASTAG = "> <CasRN>";
    final String NAMETAG = "> <Substance Name>";
    final String inchitag = "> <http://www.opentox.org/api/1.1#InChIKey_std>";
    try {
        while (iterator.hasNext()) {
            IStructureRecord record = iterator.nextRecord();
            recordNo++;
            molFile = file.getName().replace(".sdf", String.format("_%d.mol", recordNo));
            String name = getProperty(record.getContent(), NAMETAG);
            String inchi = getProperty(record.getContent(), inchitag);
            if (inchi != null)
                System.out.println(inchi);
            String cas = getProperty(record.getContent(), CASTAG);
            String ec = getProperty(record.getContent(), ECTAG);
            String title = null;
            if (cas != null) {
                title = cas.trim();
                molFile = String.format("CAS_%s.mol", title.trim());
            } else if (ec != null) {
                title = ec.trim();
                molFile = String.format("EC_%s.mol", title.trim());
            } else if (inchi != null) {
                title = inchi.trim();
                molFile = String.format("%s.mol", title.trim());
            } else if (name != null) {
                title = name.trim();
            } else {
                System.out.println(record.getContent());
            }
            if (title != null) {
                // replace first line
                int nameindex = record.getContent().indexOf("\n");
                if (nameindex >= 0)
                    record.setContent(String.format("%s\n%s", title.trim(), record.getContent().substring(nameindex + 1)));
            }
            int mend = record.getContent().indexOf("M  END\n");
            if (mend > 0) {
                FileWriter writer = new FileWriter(String.format("%s/%s", file.getParent() == null ? "" : file.getParent(), molFile));
                writer.write(record.getContent().substring(0, mend + 7));
                writer.flush();
                writer.close();
            }
        // if (recordNo > 3) break;
        }
    } catch (Exception x) {
        x.printStackTrace();
    } finally {
        reader.close();
    }
    return recordNo;
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) FileWriter(java.io.FileWriter) FileReader(java.io.FileReader)

Example 5 with RawIteratingSDFReader

use of ambit2.core.io.RawIteratingSDFReader in project ambit-mirror by ideaconsult.

the class RepositoryWriterTest method testImportDX.

@Test
public void testImportDX() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/dataset-properties.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(5, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(5, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(1, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(2, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(4, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(4, property_values.getRowCount());
    ITable template_def = c.createQueryTable("EXPECTED", "SELECT * FROM template_def");
    Assert.assertEquals(3, template_def.getRowCount());
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/core/data/dx/predictions.sdf");
    Assert.assertNotNull(in);
    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(in));
    // reader.setReference("predictions.sdf");
    write(reader, c.getConnection(), new CASKey());
    reader.close();
    c.close();
    c = getConnection();
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(6, chemicals.getRowCount());
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(6, strucs.getRowCount());
    // srcdataset =
    // c.createQueryTable("EXPECTED","SELECT * FROM src_dataset where name='Imported properties'");
    // Assert.assertEquals(1,srcdataset.getRowCount());
    // struc_src =
    // c.createQueryTable("EXPECTED","SELECT * FROM struc_dataset join src_dataset using(id_srcdataset) where name='Imported properties'");
    // Assert.assertEquals(1,struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(31, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(31, property_values.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values where idstructure=100215");
    Assert.assertEquals(2, property_values.getRowCount());
    // ITable p_tuples =
    // c.createQueryTable("EXPECTED","SELECT * FROM property_tuples join tuples using(idtuple) join src_dataset using(id_srcdataset) where name='Imported properties'");
    // Assert.assertEquals(66,p_tuples.getRowCount());
    c.close();
    c = getConnection();
    in = this.getClass().getClassLoader().getResourceAsStream("ambit2/core/data/dx/predictions1.sdf");
    Assert.assertNotNull(in);
    reader = new RawIteratingSDFReader(new InputStreamReader(in));
    // reader.setReference(LiteratureEntry.getDXReference());
    write(reader, c.getConnection(), new CASKey());
    reader.close();
    c = getConnection();
    property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(31, property.getRowCount());
    property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(47, property_values.getRowCount());
    template_def = c.createQueryTable("EXPECTED", "SELECT * FROM template_def");
    Assert.assertEquals(30, template_def.getRowCount());
    c.close();
}
Also used : RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) CASKey(ambit2.core.processors.structure.key.CASKey) InputStream(java.io.InputStream) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) Test(org.junit.Test)

Aggregations

RawIteratingSDFReader (ambit2.core.io.RawIteratingSDFReader)30 InputStreamReader (java.io.InputStreamReader)26 Test (org.junit.Test)21 IStructureRecord (ambit2.base.interfaces.IStructureRecord)19 InputStream (java.io.InputStream)18 IDatabaseConnection (org.dbunit.database.IDatabaseConnection)13 ITable (org.dbunit.dataset.ITable)12 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)7 FileReader (java.io.FileReader)6 IAtomContainer (org.openscience.cdk.interfaces.IAtomContainer)6 File (java.io.File)5 IIteratingChemObjectReader (org.openscience.cdk.io.iterator.IIteratingChemObjectReader)5 IOException (java.io.IOException)4 AmbitException (net.idea.modbcum.i.exceptions.AmbitException)4 AtomConfigurator (ambit2.core.processors.structure.AtomConfigurator)3 BitSet (java.util.BitSet)3 GZIPInputStream (java.util.zip.GZIPInputStream)3 Property (ambit2.base.data.Property)2 StructureRecord (ambit2.base.data.StructureRecord)2 IteratingDelimitedFileReader (ambit2.core.io.IteratingDelimitedFileReader)2