Search in sources :

Example 1 with CSV12Reader

use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.

the class ProteinCoronaPaperReaderTest method test.

@Test
public void test() throws Exception {
    RawIteratingWrapper reader = null;
    try {
        LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
        entry.setType(_type.Dataset);
        File baseDir = new File(System.getProperty("java.io.tmpdir"));
        File datafile = new File(baseDir, "MergedSheets.csv");
        if (!datafile.exists()) {
            URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
            DownloadTool.download(url, datafile);
        }
        CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
        reader = new CSV12SubstanceReader(chemObjectReader);
        int r = 0;
        while (reader.hasNext()) {
            IStructureRecord mol = reader.nextRecord();
            Assert.assertTrue(mol instanceof SubstanceRecord);
            System.out.println(((SubstanceRecord) mol).getPublicName());
            System.out.println(((SubstanceRecord) mol).getMeasurements());
            r++;
        }
        Assert.assertTrue(r >= 120);
    } finally {
        reader.close();
    }
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) RawIteratingWrapper(ambit2.core.io.RawIteratingWrapper) LiteratureEntry(ambit2.base.data.LiteratureEntry) ILiteratureEntry(ambit2.base.data.ILiteratureEntry) CSV12Reader(net.idea.loom.nm.csv.CSV12Reader) SubstanceRecord(ambit2.base.data.SubstanceRecord) FileReader(java.io.FileReader) File(java.io.File) URL(java.net.URL) CSV12SubstanceReader(net.idea.loom.nm.csv.CSV12SubstanceReader) DbUnitTest(ambit2.db.processors.test.DbUnitTest) Test(org.junit.Test)

Example 2 with CSV12Reader

use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.

the class ProteinCoronaPaperReaderTest method testRDFExport.

@Test
public void testRDFExport() throws Exception {
    CSV12SubstanceReader reader = null;
    Request hack = new Request();
    hack.setRootRef(new Reference("http://localhost/ambit2"));
    File baseDir = new File(System.getProperty("java.io.tmpdir"));
    File datafile = new File(baseDir, "MergedSheets.csv");
    if (!datafile.exists()) {
        URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
        DownloadTool.download(url, datafile);
    }
    SubstanceRDFReporter r = new SubstanceRDFReporter(hack, MediaType.TEXT_RDF_N3);
    Model model = ModelFactory.createDefaultModel();
    r.header(model, null);
    r.setOutput(model);
    LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
    entry.setType(_type.Dataset);
    try {
        CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
        reader = new CSV12SubstanceReader(chemObjectReader);
        while (reader.hasNext()) {
            IStructureRecord record = reader.nextRecord();
            Assert.assertTrue(record instanceof SubstanceRecord);
            r.processItem((SubstanceRecord) record);
        }
        r.footer(model, null);
        File output = new File(System.getProperty("java.io.tmpdir") + "/" + "protein_export.ttl");
        System.out.println("Exported to " + output.getAbsolutePath());
        OutputStream writer = new FileOutputStream(output);
        RDFDataMgr.write(writer, model, RDFFormat.TURTLE);
    } finally {
        if (reader != null)
            reader.close();
    }
}
Also used : Reference(org.restlet.data.Reference) LiteratureEntry(ambit2.base.data.LiteratureEntry) ILiteratureEntry(ambit2.base.data.ILiteratureEntry) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) Request(org.restlet.Request) CSV12Reader(net.idea.loom.nm.csv.CSV12Reader) SubstanceRecord(ambit2.base.data.SubstanceRecord) SubstanceRDFReporter(ambit2.rest.substance.SubstanceRDFReporter) CSV12SubstanceReader(net.idea.loom.nm.csv.CSV12SubstanceReader) URL(java.net.URL) IStructureRecord(ambit2.base.interfaces.IStructureRecord) FileOutputStream(java.io.FileOutputStream) Model(com.hp.hpl.jena.rdf.model.Model) FileReader(java.io.FileReader) File(java.io.File) DbUnitTest(ambit2.db.processors.test.DbUnitTest) Test(org.junit.Test)

Example 3 with CSV12Reader

use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.

the class ProteinCoronaPaperReaderTest method testWriteProteinCoronaData.

@Test
public void testWriteProteinCoronaData() throws Exception {
    setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
    IDatabaseConnection c = getConnection();
    IRawReader<IStructureRecord> parser = null;
    try {
        LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
        entry.setType(_type.Dataset);
        File baseDir = new File(System.getProperty("java.io.tmpdir"));
        File datafile = new File(baseDir, "MergedSheets.csv");
        if (!datafile.exists()) {
            URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
            DownloadTool.download(url, datafile);
        }
        CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
        parser = new CSV12SubstanceReader(chemObjectReader);
        write(parser, c.getConnection(), new ReferenceSubstanceUUID(), false);
    } finally {
        parser.close();
        c.close();
    }
}
Also used : IStructureRecord(ambit2.base.interfaces.IStructureRecord) ReferenceSubstanceUUID(ambit2.core.processors.structure.key.ReferenceSubstanceUUID) LiteratureEntry(ambit2.base.data.LiteratureEntry) ILiteratureEntry(ambit2.base.data.ILiteratureEntry) CSV12Reader(net.idea.loom.nm.csv.CSV12Reader) FileReader(java.io.FileReader) IDatabaseConnection(org.dbunit.database.IDatabaseConnection) File(java.io.File) URL(java.net.URL) CSV12SubstanceReader(net.idea.loom.nm.csv.CSV12SubstanceReader) DbUnitTest(ambit2.db.processors.test.DbUnitTest) Test(org.junit.Test)

Example 4 with CSV12Reader

use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.

the class CallableSubstanceImporter method createBatch.

@Override
protected AbstractBatchProcessor createBatch(FileInputState target) throws Exception {
    if (target == null)
        throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST);
    final BatchDBProcessor<String> batch = new BatchDBProcessor<String>() {

        /**
         */
        private static final long serialVersionUID = 5712170806359764006L;

        @Override
        public Iterator<String> getIterator(IInputState target) throws AmbitException {
            try {
                IRawReader<IStructureRecord> reader = null;
                File file = ((FileInputState) target).getFile();
                String ext = file.getName().toLowerCase();
                if (ext.endsWith(FileInputState._FILE_TYPE.I5Z_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        if (writer instanceof DBSubstanceWriter) {
                            ((DBSubstanceWriter) writer).setSplitRecord(true);
                            ((DBSubstanceWriter) writer).setI5mode(true);
                        }
                    reader = new I5ZReader(file);
                    ((I5ZReader) reader).setQASettings(getQASettings());
                } else if (ext.endsWith(FileInputState._FILE_TYPE.I6Z_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        if (writer instanceof DBSubstanceWriter) {
                            ((DBSubstanceWriter) writer).setSplitRecord(true);
                            ((DBSubstanceWriter) writer).setI5mode(true);
                        }
                    reader = new I6ZReader(file);
                    ((I6ZReader) reader).setQASettings(getQASettings());
                } else if (ext.endsWith(FileInputState._FILE_TYPE.CSV_INDEX.getExtension())) {
                    if (writer instanceof DBSubstanceWriter)
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    LiteratureEntry reference = new LiteratureEntry(originalname, originalname);
                    reader = new CSV12SubstanceReader(new CSV12Reader(new FileReader(file), reference, "FCSV-"));
                } else if (ext.endsWith(".rdf")) {
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                        ((DBSubstanceWriter) writer).setImportBundles(true);
                    }
                    reader = new NanoWikiRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), null, "RDF/XML");
                } else if (ext.endsWith(".ttl")) {
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                        ((DBSubstanceWriter) writer).setImportBundles(true);
                    }
                    reader = new ENanoMapperRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), "ENM3");
                } else if (FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext) || FileInputState._FILE_TYPE.XLS_INDEX.hasExtension(ext)) {
                    if (configFile == null)
                        throw new AmbitException("XLSX/XLSX file import requires a JSON configuration file");
                    final StructureRecordValidator validator = new StructureRecordValidator(file.getName(), true, "XLSX");
                    reader = new GenericExcelParser(new FileInputStream(file), configFile, FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext)) {

                        public Object next() {
                            Object record = super.next();
                            try {
                                if (record instanceof IStructureRecord)
                                    record = validator.process((IStructureRecord) record);
                            } catch (Exception x) {
                            }
                            return record;
                        }
                    };
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    /*
							 * ((DBSubstanceWriter) writer)
							 * .setClearComposition(false); ((DBSubstanceWriter)
							 * writer) .setClearMeasurements(false);
							 */
                    }
                } else if (ext.endsWith(".json")) {
                    if (writer instanceof DBSubstanceWriter)
                        ((DBSubstanceWriter) writer).setSplitRecord(false);
                    reader = new SubstanceStudyParser(new InputStreamReader(new FileInputStream(file), "UTF-8")) {

                        protected EffectRecord createEffectRecord(Protocol protocol) {
                            try {
                                I5_ROOT_OBJECTS category = I5_ROOT_OBJECTS.valueOf(protocol.getCategory() + "_SECTION");
                                return category.createEffectRecord();
                            } catch (Exception x) {
                                return super.createEffectRecord(protocol);
                            }
                        }
                    };
                    if (writer instanceof DBSubstanceWriter) {
                        ((DBSubstanceWriter) writer).setClearComposition(false);
                        ((DBSubstanceWriter) writer).setClearMeasurements(false);
                    }
                } else {
                    throw new AmbitException("Unsupported format " + file);
                }
                reader.setErrorHandler(new IChemObjectReaderErrorHandler() {

                    @Override
                    public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
                    }

                    @Override
                    public void handleError(String message, int row, int colStart, int colEnd) {
                    }

                    @Override
                    public void handleError(String message, Exception exception) {
                    }

                    @Override
                    public void handleError(String message) {
                    }
                });
                return reader;
            } catch (AmbitException x) {
                throw x;
            } catch (Exception x) {
                throw new AmbitException(x);
            }
        }
    };
    return batch;
}
Also used : DBSubstanceWriter(ambit2.db.substance.processor.DBSubstanceWriter) GenericExcelParser(net.enanomapper.parser.GenericExcelParser) LiteratureEntry(ambit2.base.data.LiteratureEntry) StructureRecordValidator(ambit2.base.data.study.StructureRecordValidator) I5_ROOT_OBJECTS(net.idea.i5.io.I5_ROOT_OBJECTS) CSV12Reader(net.idea.loom.nm.csv.CSV12Reader) I5ZReader(net.idea.i5.io.I5ZReader) SubstanceStudyParser(ambit2.core.io.json.SubstanceStudyParser) I6ZReader(net.idea.i6.io.I6ZReader) CSV12SubstanceReader(net.idea.loom.nm.csv.CSV12SubstanceReader) IStructureRecord(ambit2.base.interfaces.IStructureRecord) IChemObjectReaderErrorHandler(org.openscience.cdk.io.IChemObjectReaderErrorHandler) ENanoMapperRDFReader(net.idea.loom.nm.nanowiki.ENanoMapperRDFReader) IInputState(ambit2.core.io.IInputState) ResourceException(org.restlet.resource.ResourceException) FileReader(java.io.FileReader) Protocol(ambit2.base.data.study.Protocol) InputStreamReader(java.io.InputStreamReader) BatchDBProcessor(ambit2.db.processors.BatchDBProcessor) FileInputStream(java.io.FileInputStream) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) ResourceException(org.restlet.resource.ResourceException) EffectRecord(ambit2.base.data.study.EffectRecord) File(java.io.File) FileInputState(ambit2.core.io.FileInputState) NanoWikiRDFReader(net.idea.loom.nm.nanowiki.NanoWikiRDFReader) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Aggregations

LiteratureEntry (ambit2.base.data.LiteratureEntry)4 IStructureRecord (ambit2.base.interfaces.IStructureRecord)4 File (java.io.File)4 FileReader (java.io.FileReader)4 CSV12Reader (net.idea.loom.nm.csv.CSV12Reader)4 CSV12SubstanceReader (net.idea.loom.nm.csv.CSV12SubstanceReader)4 ILiteratureEntry (ambit2.base.data.ILiteratureEntry)3 DbUnitTest (ambit2.db.processors.test.DbUnitTest)3 URL (java.net.URL)3 Test (org.junit.Test)3 SubstanceRecord (ambit2.base.data.SubstanceRecord)2 EffectRecord (ambit2.base.data.study.EffectRecord)1 Protocol (ambit2.base.data.study.Protocol)1 StructureRecordValidator (ambit2.base.data.study.StructureRecordValidator)1 FileInputState (ambit2.core.io.FileInputState)1 IInputState (ambit2.core.io.IInputState)1 RawIteratingWrapper (ambit2.core.io.RawIteratingWrapper)1 SubstanceStudyParser (ambit2.core.io.json.SubstanceStudyParser)1 ReferenceSubstanceUUID (ambit2.core.processors.structure.key.ReferenceSubstanceUUID)1 BatchDBProcessor (ambit2.db.processors.BatchDBProcessor)1