Search in sources :

Example 1 with I5ZReader

use of net.idea.i5.io.I5ZReader in project ambit-mirror by ideaconsult.

the class I5SubstanceWriterTest method testWriteMultipleFiles_i5d.

@Test
public void testWriteMultipleFiles_i5d() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
    IDatabaseConnection c = getConnection();
    ITable chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(0, chemicals.getRowCount());
    ITable strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(0, strucs.getRowCount());
    ITable srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset");
    Assert.assertEquals(0, srcdataset.getRowCount());
    ITable struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(0, struc_src.getRowCount());
    ITable property = c.createQueryTable("EXPECTED", "SELECT * FROM properties");
    Assert.assertEquals(0, property.getRowCount());
    ITable property_values = c.createQueryTable("EXPECTED", "SELECT * FROM property_values");
    Assert.assertEquals(0, property_values.getRowCount());
    /**
     * Now reading only substances and reference substances Document types:
     * EndpointStudyRecord: 877 AttachmentDocument: 5 LegalEntity: 1
     * ReferenceSubstance: 6 Substance: 1 EndpointRecord: 14
     */
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("ambit2/db/substance/testNM.i5z");
    // InputStream in =
    // I5AmbitProcessor.class.getClassLoader().getResourceAsStream("net/idea/i5/_5/substance/i5z/IUC4-efdb21bb-e79f-3286-a988-b6f6944d3734.i5z");
    // InputStream in =
    // I5AmbitProcessor.class.getClassLoader().getResourceAsStream("net/idea/i5/_5/substance/i5z/IUC4-e2b69497-1c50-3d0b-a2b2-41d0a4d74c54.i5z");
    // InputStream in =
    // I5AmbitProcessor.class.getClassLoader().getResourceAsStream("net/idea/i5/_5/substance/i5z/IUC4-f5dd46ce-6fc9-316f-a468-c4f9acfcfc3c.i5z");
    Assert.assertNotNull(in);
    File i5z = File.createTempFile("test_", ".i5z");
    try {
        DownloadTool.download(in, i5z);
    } finally {
        in.close();
    }
    Assert.assertTrue(i5z.exists());
    I5ZReader reader = null;
    int records = 0;
    try {
        reader = new I5ZReader(i5z);
        QASettings qa = new QASettings(false);
        qa.setAll();
        reader.setQASettings(qa);
        PropertyKey key = new ReferenceSubstanceUUID();
        records = write(null, reader, c.getConnection(), key, true, false, false);
    } finally {
        try {
            reader.close();
        } catch (Exception x) {
        }
        try {
            c.close();
        } catch (Exception x) {
        }
        try {
            i5z.delete();
        } catch (Exception x) {
        }
    }
    Assert.assertEquals(8, records);
    c = getConnection();
    ITable substance = c.createQueryTable("EXPECTED", "SELECT * FROM substance");
    Assert.assertEquals(1, substance.getRowCount());
    Assert.assertNotNull(substance.getValue(0, "uuid"));
    chemicals = c.createQueryTable("EXPECTED", "SELECT * FROM chemicals");
    Assert.assertEquals(1, chemicals.getRowCount());
    // there are two empty file without $$$$ sign, which are skipped
    strucs = c.createQueryTable("EXPECTED", "SELECT * FROM structure");
    Assert.assertEquals(1, strucs.getRowCount());
    srcdataset = c.createQueryTable("EXPECTED", "SELECT * FROM src_dataset where name='IUCLID5 .i5z file'");
    Assert.assertEquals(1, srcdataset.getRowCount());
    struc_src = c.createQueryTable("EXPECTED", "SELECT * FROM struc_dataset");
    Assert.assertEquals(1, struc_src.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM substance_protocolapplication where topcategory='P-CHEM' and endpointcategory='ASPECT_RATIO_SHAPE_SECTION' and interpretation_result='spherical'");
    Assert.assertEquals(1, property.getRowCount());
    property = c.createQueryTable("EXPECTED", "SELECT * FROM substance_experiment where topcategory='P-CHEM' and endpointcategory='ASPECT_RATIO_SHAPE_SECTION'");
    Assert.assertEquals(4, property.getRowCount());
    c.close();
}
Also used : ReferenceSubstanceUUID(ambit2.core.processors.structure.key.ReferenceSubstanceUUID) InputStream(java.io.InputStream) QASettings(net.idea.i5.io.QASettings) ITable(org.dbunit.dataset.ITable) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) I5ZReader(net.idea.i5.io.I5ZReader) File(java.io.File) PropertyKey(ambit2.core.processors.structure.key.PropertyKey) Test(org.junit.Test)

Example 2 with I5ZReader

use of net.idea.i5.io.I5ZReader in project ambit-mirror by ideaconsult.

the class Context method importI5Z.

protected int importI5Z(IStructureKey keytomatch, boolean i6, StructureRecordValidator validator) throws Exception {
    // validator uses parsertype
    setParserType(i6 ? _parsertype.i6z : _parsertype.i5z);
    logger_cli.log(Level.INFO, "MSG_IMPORT", new Object[] { String.format("i%sz", i6 ? "6" : "5"), inputFile.getAbsolutePath() });
    IZReader reader = null;
    Connection c = null;
    try {
        DBConnectionConfigurable<Context> dbc = null;
        dbc = getConnection(getConfigFile());
        c = dbc.getConnection();
        c.setAutoCommit(true);
        I5Options options = new I5Options();
        options.setMaxReferenceStructures(maxRefSubstances);
        options.setExceptionOnMaxReferenceStructures(false);
        options.setAllowMultipleSubstances(false);
        if (i6)
            reader = new I6ZReader<>(inputFile, options);
        else
            reader = new I5ZReader<>(inputFile, options);
        QASettings qa = new QASettings(false);
        qa.setAll();
        reader.setQASettings(qa);
        matchByKey = keytomatch == null ? new CASKey() : keytomatch;
        return write(reader, c, matchByKey, true, clearMeasurements, clearComposition, validator, null, true, false);
    } catch (Exception x) {
        throw x;
    } finally {
        if (reader != null)
            reader.close();
        try {
            if (c != null)
                c.close();
        } catch (Exception x) {
        }
    }
}
Also used : I5Options(net.idea.i5.io.I5Options) IZReader(net.idea.i5.io.IZReader) CASKey(ambit2.core.processors.structure.key.CASKey) Connection(java.sql.Connection) MySQLSingleConnection(net.idea.modbcum.c.MySQLSingleConnection) QASettings(net.idea.i5.io.QASettings) I5ZReader(net.idea.i5.io.I5ZReader) I6ZReader(net.idea.i6.io.I6ZReader) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) CommunicationsException(com.mysql.jdbc.CommunicationsException) FileNotFoundException(java.io.FileNotFoundException) SQLException(java.sql.SQLException) ConnectException(java.net.ConnectException) IOException(java.io.IOException)

Example 3 with I5ZReader

use of net.idea.i5.io.I5ZReader in project ambit-mirror by ideaconsult.

the class CallableSubstanceImporter method createBatch.

@Override
protected AbstractBatchProcessor createBatch(FileInputState target) throws Exception {
    if (target == null)
        throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST);
    final BatchDBProcessor<String> batch = new BatchDBProcessor<String>() {

        /**
         */
        private static final long serialVersionUID = 5712170806359764006L;

        @Override
        public Iterator<String> getIterator(IInputState target) throws AmbitException {
            try {
                IRawReader<IStructureRecord> reader = null;
                File file = ((FileInputState) target).getFile();
                String ext = file.getName().toLowerCase();
                if (ext.endsWith(FileInputState._FILE_TYPE.I5Z_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        if (writer instanceof DBSubstanceWriter) {
                            ((DBSubstanceWriter) writer).setSplitRecord(true);
                            ((DBSubstanceWriter) writer).setI5mode(true);
                        }
                    reader = new I5ZReader(file);
                    ((I5ZReader) reader).setQASettings(getQASettings());
                } else if (ext.endsWith(FileInputState._FILE_TYPE.I6Z_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        if (writer instanceof DBSubstanceWriter) {
                            ((DBSubstanceWriter) writer).setSplitRecord(true);
                            ((DBSubstanceWriter) writer).setI5mode(true);
                        }
                    reader = new I6ZReader(file);
                    ((I6ZReader) reader).setQASettings(getQASettings());
                } else if (ext.endsWith(FileInputState._FILE_TYPE.CSV_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    LiteratureEntry reference = new LiteratureEntry(originalname, originalname);
                    reader = new CSV12SubstanceReader(new CSV12Reader(new FileReader(file), reference, "FCSV-"));
                } else if (ext.endsWith(".rdf")) {
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                        ((DBSubstanceWriter) writer).setImportBundles(true);
                    }
                    reader = new NanoWikiRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), null, "RDF/XML");
                } else if (ext.endsWith(".ttl")) {
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                        ((DBSubstanceWriter) writer).setImportBundles(true);
                    }
                    reader = new ENanoMapperRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), "ENM3");
                } else if (FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext) || FileInputState._FILE_TYPE.XLS_INDEX.hasExtension(ext)) {
                    if (configFile == null)
                        throw new AmbitException("XLSX/XLSX file import requires a JSON configuration file");
                    final StructureRecordValidator validator = new StructureRecordValidator(file.getName(), true, "XLSX");
                    reader = new GenericExcelParser(new FileInputStream(file), configFile, FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext)) {

                        public Object next() {
                            Object record = super.next();
                            try {
                                if (record instanceof IStructureRecord)
                                    record = validator.process((IStructureRecord) record);
                            } catch (Exception x) {
                            }
                            return record;
                        }
                    };
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    /*
							 * ((DBSubstanceWriter) writer)
							 * .setClearComposition(false); ((DBSubstanceWriter)
							 * writer) .setClearMeasurements(false);
							 */
                    }
                } else if (ext.endsWith(".json")) {
                    if (writer instanceof DBSubstanceWriter)
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    reader = new SubstanceStudyParser(new InputStreamReader(new FileInputStream(file), "UTF-8")) {

                        protected EffectRecord createEffectRecord(Protocol protocol) {
                            try {
                                I5_ROOT_OBJECTS category = I5_ROOT_OBJECTS.valueOf(protocol.getCategory() + "_SECTION");
                                return category.createEffectRecord();
                            } catch (Exception x) {
                                return super.createEffectRecord(protocol);
                            }
                        }
                    };
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setClearComposition(false);
                        ((DBSubstanceWriter) writer).setClearMeasurements(false);
                    }
                } else {
                    throw new AmbitException("Unsupported format " + file);
                }
                reader.setErrorHandler(new IChemObjectReaderErrorHandler() {

                    @Override
                    public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
                    }

                    @Override
                    public void handleError(String message, int row, int colStart, int colEnd) {
                    }

                    @Override
                    public void handleError(String message, Exception exception) {
                    }

                    @Override
                    public void handleError(String message) {
                    }
                });
                return reader;
            } catch (AmbitException x) {
                throw x;
            } catch (Exception x) {
                throw new AmbitException(x);
            }
        }
    };
    return batch;
}
Also used : DBSubstanceWriter(ambit2.db.substance.processor.DBSubstanceWriter) GenericExcelParser(net.enanomapper.parser.GenericExcelParser) LiteratureEntry(ambit2.base.data.LiteratureEntry) StructureRecordValidator(ambit2.base.data.study.StructureRecordValidator) I5_ROOT_OBJECTS(net.idea.i5.io.I5_ROOT_OBJECTS) CSV12Reader(net.idea.loom.nm.csv.CSV12Reader) I5ZReader(net.idea.i5.io.I5ZReader) SubstanceStudyParser(ambit2.core.io.json.SubstanceStudyParser) I6ZReader(net.idea.i6.io.I6ZReader) CSV12SubstanceReader(net.idea.loom.nm.csv.CSV12SubstanceReader) IStructureRecord(ambit2.base.interfaces.IStructureRecord) IChemObjectReaderErrorHandler(org.openscience.cdk.io.IChemObjectReaderErrorHandler) ENanoMapperRDFReader(net.idea.loom.nm.nanowiki.ENanoMapperRDFReader) IInputState(ambit2.core.io.IInputState) ResourceException(org.restlet.resource.ResourceException) FileReader(java.io.FileReader) Protocol(ambit2.base.data.study.Protocol) InputStreamReader(java.io.InputStreamReader) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) FileInputStream(java.io.FileInputStream) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) ResourceException(org.restlet.resource.ResourceException) EffectRecord(ambit2.base.data.study.EffectRecord) File(java.io.File) FileInputState(ambit2.core.io.FileInputState) NanoWikiRDFReader(net.idea.loom.nm.nanowiki.NanoWikiRDFReader) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Aggregations

I5ZReader (net.idea.i5.io.I5ZReader)3 File (java.io.File)2 QASettings (net.idea.i5.io.QASettings)2 I6ZReader (net.idea.i6.io.I6ZReader)2 AmbitException (net.idea.modbcum.i.exceptions.AmbitException)2 LiteratureEntry (ambit2.base.data.LiteratureEntry)1 EffectRecord (ambit2.base.data.study.EffectRecord)1 Protocol (ambit2.base.data.study.Protocol)1 StructureRecordValidator (ambit2.base.data.study.StructureRecordValidator)1 IStructureRecord (ambit2.base.interfaces.IStructureRecord)1 FileInputState (ambit2.core.io.FileInputState)1 IInputState (ambit2.core.io.IInputState)1 SubstanceStudyParser (ambit2.core.io.json.SubstanceStudyParser)1 CASKey (ambit2.core.processors.structure.key.CASKey)1 PropertyKey (ambit2.core.processors.structure.key.PropertyKey)1 ReferenceSubstanceUUID (ambit2.core.processors.structure.key.ReferenceSubstanceUUID)1 BatchDBProcessor (ambit2.db.processors.BatchDBProcessor)1 DBSubstanceWriter (ambit2.db.substance.processor.DBSubstanceWriter)1 CommunicationsException (com.mysql.jdbc.CommunicationsException)1 FileInputStream (java.io.FileInputStream)1