Search in sources :

Example 1 with RawIteratingCSVReader

use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.

the class SubstanceStudyReporterTest method main.

public static void main(String[] args) {
    Substance2BucketJsonReporter reporter = new Substance2BucketJsonReporter(null, null, Substance2BucketJsonReporter._JSON_MODE.substance, null, null, null);
    Reader in = null;
    try {
        OutputStreamWriter writer;
        if (args.length < 2)
            writer = new OutputStreamWriter(System.out);
        else
            writer = new OutputStreamWriter(new FileOutputStream(new File(args[1])));
        reporter.setOutput(writer);
        in = new FileReader(new File(args[0]));
        RawIteratingCSVReader reader = new RawIteratingCSVReader(in, CSVFormat.TDF) {

            protected SubstanceRecord prevrecord = null;

            protected String getRecordid(CSVRecord record) {
                return record.get(0);
            }

            protected String getExternalId(CSVRecord record) {
                return record.get(1);
            }

            protected String getExternalIdSystem(CSVRecord record) {
                return record.get(5);
            }

            protected String getAssayid(CSVRecord record) {
                return record.get(6);
            }

            protected String getSummaryActivity(CSVRecord record) {
                return record.get(3);
            }

            protected double getActivityValue(CSVRecord record) throws NumberFormatException {
                return Double.parseDouble(record.get(4));
            }

            protected String getOrthologgroup(CSVRecord record) {
                return record.get(9);
            }

            protected String getGeneSymbol(CSVRecord record) {
                return record.get(8);
            }

            protected String getSpecies(CSVRecord record) {
                return record.get(7);
            }

            @Override
            protected IStructureRecord transform(CSVRecord record) {
                SubstanceRecord substance = prevrecord;
                String id = getRecordid(record);
                if (prevrecord == null || !id.equals(prevrecord.getSubstanceName())) {
                    substance = new SubstanceRecord();
                    substance.setContent(null);
                    // substance.setSubstancetype("standardized");
                    substance.setSubstanceUUID(I5Utils.getPrefixedUUID("PC", UUID.nameUUIDFromBytes(id.getBytes())));
                    substance.setSubstanceName(id);
                    prevrecord = substance;
                }
                structureRecord = substance;
                String externalid = getExternalId(record);
                String externaldb = getExternalIdSystem(record);
                ExternalIdentifier eid = new ExternalIdentifier(externaldb, externalid);
                boolean newid = true;
                if (substance.getExternalids() == null)
                    substance.setExternalids(new ArrayList<ExternalIdentifier>());
                else
                    for (ExternalIdentifier e : substance.getExternalids()) if (e.getSystemDesignator().equals(eid.getSystemDesignator()) && e.getSystemIdentifier().equals(eid.getSystemIdentifier())) {
                        newid = false;
                        break;
                    }
                if (newid)
                    substance.getExternalids().add(eid);
                String assayid = getAssayid(record);
                Protocol p = new Protocol(String.format("%s", externalid));
                p.setTopCategory(null);
                p.setCategory(null);
                // p.addGuideline(String.format("%s_AID%s", externaldb,assayid));
                ProtocolApplication<Protocol, IParams, String, IParams, String> papp = new ProtocolApplication<Protocol, IParams, String, IParams, String>(p);
                // papp.setDocumentUUID(I5Utils.getPrefixedUUID("PC",UUID.nameUUIDFromBytes(p.getEndpoint().getBytes())));
                papp.setDocumentUUID(null);
                papp.setInterpretationResult(getSummaryActivity(record));
                papp.setReference(String.format("AID%s", assayid));
                papp.setReferenceOwner(externaldb);
                IParams params = new Params();
                params.put("gene", getGeneSymbol(record));
                params.put("taxid", "TaxId:" + getSpecies(record));
                // params.put("OG_GENE", String.format("OG%s_%s", record.get(9),	record.get(8)));
                params.put("og", "OG" + getOrthologgroup(record));
                params.put("ez", "entrez:" + record.get(2));
                papp.setParameters(params);
                try {
                    EffectRecord<String, IParams, String> effect = new EffectRecord<String, IParams, String>();
                    effect.setEndpoint("pXC50");
                    effect.setIdresult(1);
                    double value = getActivityValue(record);
                    effect.setLoValue(value);
                    effect.setUnit("nM");
                    papp.addEffect(effect);
                } catch (Exception x) {
                // x.printStackTrace();
                }
                substance.addMeasurement(papp);
                return structureRecord;
            }
        };
        int n = 0;
        IStructureRecord prevrecord = null;
        reporter.header(writer, null);
        while (reader.hasNext()) {
            IStructureRecord record = reader.nextRecord();
            if (prevrecord != null && (prevrecord != record)) {
                reporter.processItem((SubstanceRecord) prevrecord);
                writer.flush();
            }
            n++;
            prevrecord = record;
            if ((n % 100000) == 0) {
                System.err.println();
                System.err.print(n);
            } else if ((n % 10000) == 0)
                System.err.print(".");
        }
        reporter.processItem((SubstanceRecord) prevrecord);
        reporter.footer(writer, null);
        writer.flush();
    } catch (Exception x) {
        x.printStackTrace();
    } finally {
        try {
            in.close();
        } catch (Exception x) {
        }
        try {
            reporter.close();
        } catch (Exception x) {
        }
    }
}
Also used : RawIteratingCSVReader(ambit2.core.io.RawIteratingCSVReader) ExternalIdentifier(ambit2.base.data.substance.ExternalIdentifier) ArrayList(java.util.ArrayList) RawIteratingCSVReader(ambit2.core.io.RawIteratingCSVReader) Reader(java.io.Reader) FileReader(java.io.FileReader) SubstanceRecord(ambit2.base.data.SubstanceRecord) IParams(ambit2.base.data.study.IParams) Params(ambit2.base.data.study.Params) IStructureRecord(ambit2.base.interfaces.IStructureRecord) ProtocolApplication(ambit2.base.data.study.ProtocolApplication) EffectRecord(ambit2.base.data.study.EffectRecord) FileOutputStream(java.io.FileOutputStream) Substance2BucketJsonReporter(ambit2.rest.substance.study.Substance2BucketJsonReporter) IParams(ambit2.base.data.study.IParams) OutputStreamWriter(java.io.OutputStreamWriter) FileReader(java.io.FileReader) CSVRecord(org.apache.commons.csv.CSVRecord) Protocol(ambit2.base.data.study.Protocol) File(java.io.File)

Example 2 with RawIteratingCSVReader

use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.

the class RawIteratingWrapperTest method testCSVReader.

@Test
public void testCSVReader() throws Exception {
    RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/data/io/test.txt")), CSVFormat.TDF);
    MoleculeReader molreader = new MoleculeReader();
    try {
        int count = 0;
        while (reader.hasNext()) {
            IStructureRecord record = reader.nextRecord();
            if (record.getSmiles() != null && record.getInchi() != null) {
                Assert.assertNotNull(record.getContent());
                Assert.assertNotNull(record.getSmiles());
                Assert.assertNotNull(record.getInchi());
                Assert.assertNotNull(record.getInchiKey());
            }
            Object o = reader.next();
            Assert.assertNotNull(o);
            Assert.assertTrue(o instanceof IStructureRecord);
            IAtomContainer mol = molreader.process((IStructureRecord) o);
            if (record.getSmiles() != null && !"".equals(record.getSmiles()))
                Assert.assertTrue(String.format("Atoms %d", mol.getAtomCount()), mol.getAtomCount() > 0);
            count++;
        }
        Assert.assertEquals(11, count);
    } finally {
        reader.close();
    }
}
Also used : RawIteratingCSVReader(ambit2.core.io.RawIteratingCSVReader) IStructureRecord(ambit2.base.interfaces.IStructureRecord) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) InputStreamReader(java.io.InputStreamReader) MoleculeReader(ambit2.core.processors.structure.MoleculeReader) Test(org.junit.Test)

Example 3 with RawIteratingCSVReader

use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.

the class BatchDBProcessor method getIterator.

public Iterator<ITEMTYPE> getIterator(IInputState target) throws AmbitException {
    if (target instanceof FileInputState)
        try {
            File _file = ((FileInputState) target).getFile();
            if (_file.isDirectory()) {
                FilenameFilter filter = new FilenameFilter() {

                    public boolean accept(File dir, String name) {
                        return !name.startsWith(".");
                    }
                };
                return new RawIteratingFolderReader(_file.listFiles(filter));
            } else {
                InputStream stream = null;
                String filename = _file.getName();
                if (filename.endsWith(_FILE_TYPE.GZ_INDEX.getExtension())) {
                    String uncompressed = filename.replaceAll(_FILE_TYPE.GZ_INDEX.getExtension(), "");
                    try {
                        stream = new GZIPInputStream(new FileInputStream(_file));
                        filename = uncompressed;
                    } catch (IOException x) {
                        throw new AmbitIOException(x);
                    }
                } else
                    stream = new FileInputStream(_file);
                if (FileInputState._FILE_TYPE.SDF_INDEX.hasExtension(filename)) {
                    RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(stream));
                    if (getReference() == null)
                        reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
                    else
                        reader.setReference(getReference());
                    return reader;
                } else if (FileInputState._FILE_TYPE.MOL_INDEX.hasExtension(filename)) {
                    RawIteratingMOLReader reader = new RawIteratingMOLReader(new InputStreamReader(stream));
                    if (getReference() == null)
                        reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
                    else
                        reader.setReference(getReference());
                    return reader;
                /* TEST and replace the wrapper with this */
                } else if (FileInputState._FILE_TYPE.CSV_INDEX.hasExtension(filename)) {
                    RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(stream), CSVFormat.EXCEL);
                    configureReader(reader, target, _file);
                    return reader;
                } else if (FileInputState._FILE_TYPE.TXT_INDEX.hasExtension(filename)) {
                    RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(stream), CSVFormat.TDF.withCommentMarker('#'));
                    configureReader(reader, target, _file);
                    return reader;
                } else {
                    IIteratingChemObjectReader ir = FileInputState.getReader(stream, filename);
                    if (ir == null)
                        throw new AmbitException("Unsupported format " + filename);
                    else {
                        if (ir instanceof RawIteratingCSVReader) {
                            configureReader(((RawIteratingCSVReader) ir), target, _file);
                        }
                        RawIteratingWrapper reader = new RawIteratingWrapper(ir);
                        if (getReference() == null)
                            reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
                        else
                            reader.setReference(getReference());
                        return reader;
                    }
                }
            }
        } catch (IOException x) {
            throw new AmbitIOException(x);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
    else
        throw new AmbitException("Not a file");
}
Also used : RawIteratingCSVReader(ambit2.core.io.RawIteratingCSVReader) RawIteratingSDFReader(ambit2.core.io.RawIteratingSDFReader) InputStreamReader(java.io.InputStreamReader) IIteratingChemObjectReader(org.openscience.cdk.io.iterator.IIteratingChemObjectReader) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RawIteratingFolderReader(ambit2.core.io.RawIteratingFolderReader) AmbitIOException(ambit2.base.exceptions.AmbitIOException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) AmbitIOException(ambit2.base.exceptions.AmbitIOException) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) IOException(java.io.IOException) GZIPInputStream(java.util.zip.GZIPInputStream) FilenameFilter(java.io.FilenameFilter) AmbitIOException(ambit2.base.exceptions.AmbitIOException) RawIteratingWrapper(ambit2.core.io.RawIteratingWrapper) RawIteratingMOLReader(ambit2.core.io.RawIteratingMOLReader) FileInputState(ambit2.core.io.FileInputState) File(java.io.File) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Aggregations

RawIteratingCSVReader (ambit2.core.io.RawIteratingCSVReader)3 IStructureRecord (ambit2.base.interfaces.IStructureRecord)2 File (java.io.File)2 InputStreamReader (java.io.InputStreamReader)2 SubstanceRecord (ambit2.base.data.SubstanceRecord)1 EffectRecord (ambit2.base.data.study.EffectRecord)1 IParams (ambit2.base.data.study.IParams)1 Params (ambit2.base.data.study.Params)1 Protocol (ambit2.base.data.study.Protocol)1 ProtocolApplication (ambit2.base.data.study.ProtocolApplication)1 ExternalIdentifier (ambit2.base.data.substance.ExternalIdentifier)1 AmbitIOException (ambit2.base.exceptions.AmbitIOException)1 FileInputState (ambit2.core.io.FileInputState)1 RawIteratingFolderReader (ambit2.core.io.RawIteratingFolderReader)1 RawIteratingMOLReader (ambit2.core.io.RawIteratingMOLReader)1 RawIteratingSDFReader (ambit2.core.io.RawIteratingSDFReader)1 RawIteratingWrapper (ambit2.core.io.RawIteratingWrapper)1 MoleculeReader (ambit2.core.processors.structure.MoleculeReader)1 Substance2BucketJsonReporter (ambit2.rest.substance.study.Substance2BucketJsonReporter)1 FileInputStream (java.io.FileInputStream)1