use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.
the class SubstanceStudyReporterTest method main.
public static void main(String[] args) {
Substance2BucketJsonReporter reporter = new Substance2BucketJsonReporter(null, null, Substance2BucketJsonReporter._JSON_MODE.substance, null, null, null);
Reader in = null;
try {
OutputStreamWriter writer;
if (args.length < 2)
writer = new OutputStreamWriter(System.out);
else
writer = new OutputStreamWriter(new FileOutputStream(new File(args[1])));
reporter.setOutput(writer);
in = new FileReader(new File(args[0]));
RawIteratingCSVReader reader = new RawIteratingCSVReader(in, CSVFormat.TDF) {
protected SubstanceRecord prevrecord = null;
protected String getRecordid(CSVRecord record) {
return record.get(0);
}
protected String getExternalId(CSVRecord record) {
return record.get(1);
}
protected String getExternalIdSystem(CSVRecord record) {
return record.get(5);
}
protected String getAssayid(CSVRecord record) {
return record.get(6);
}
protected String getSummaryActivity(CSVRecord record) {
return record.get(3);
}
protected double getActivityValue(CSVRecord record) throws NumberFormatException {
return Double.parseDouble(record.get(4));
}
protected String getOrthologgroup(CSVRecord record) {
return record.get(9);
}
protected String getGeneSymbol(CSVRecord record) {
return record.get(8);
}
protected String getSpecies(CSVRecord record) {
return record.get(7);
}
@Override
protected IStructureRecord transform(CSVRecord record) {
SubstanceRecord substance = prevrecord;
String id = getRecordid(record);
if (prevrecord == null || !id.equals(prevrecord.getSubstanceName())) {
substance = new SubstanceRecord();
substance.setContent(null);
// substance.setSubstancetype("standardized");
substance.setSubstanceUUID(I5Utils.getPrefixedUUID("PC", UUID.nameUUIDFromBytes(id.getBytes())));
substance.setSubstanceName(id);
prevrecord = substance;
}
structureRecord = substance;
String externalid = getExternalId(record);
String externaldb = getExternalIdSystem(record);
ExternalIdentifier eid = new ExternalIdentifier(externaldb, externalid);
boolean newid = true;
if (substance.getExternalids() == null)
substance.setExternalids(new ArrayList<ExternalIdentifier>());
else
for (ExternalIdentifier e : substance.getExternalids()) if (e.getSystemDesignator().equals(eid.getSystemDesignator()) && e.getSystemIdentifier().equals(eid.getSystemIdentifier())) {
newid = false;
break;
}
if (newid)
substance.getExternalids().add(eid);
String assayid = getAssayid(record);
Protocol p = new Protocol(String.format("%s", externalid));
p.setTopCategory(null);
p.setCategory(null);
// p.addGuideline(String.format("%s_AID%s", externaldb,assayid));
ProtocolApplication<Protocol, IParams, String, IParams, String> papp = new ProtocolApplication<Protocol, IParams, String, IParams, String>(p);
// papp.setDocumentUUID(I5Utils.getPrefixedUUID("PC",UUID.nameUUIDFromBytes(p.getEndpoint().getBytes())));
papp.setDocumentUUID(null);
papp.setInterpretationResult(getSummaryActivity(record));
papp.setReference(String.format("AID%s", assayid));
papp.setReferenceOwner(externaldb);
IParams params = new Params();
params.put("gene", getGeneSymbol(record));
params.put("taxid", "TaxId:" + getSpecies(record));
// params.put("OG_GENE", String.format("OG%s_%s", record.get(9), record.get(8)));
params.put("og", "OG" + getOrthologgroup(record));
params.put("ez", "entrez:" + record.get(2));
papp.setParameters(params);
try {
EffectRecord<String, IParams, String> effect = new EffectRecord<String, IParams, String>();
effect.setEndpoint("pXC50");
effect.setIdresult(1);
double value = getActivityValue(record);
effect.setLoValue(value);
effect.setUnit("nM");
papp.addEffect(effect);
} catch (Exception x) {
// x.printStackTrace();
}
substance.addMeasurement(papp);
return structureRecord;
}
};
int n = 0;
IStructureRecord prevrecord = null;
reporter.header(writer, null);
while (reader.hasNext()) {
IStructureRecord record = reader.nextRecord();
if (prevrecord != null && (prevrecord != record)) {
reporter.processItem((SubstanceRecord) prevrecord);
writer.flush();
}
n++;
prevrecord = record;
if ((n % 100000) == 0) {
System.err.println();
System.err.print(n);
} else if ((n % 10000) == 0)
System.err.print(".");
}
reporter.processItem((SubstanceRecord) prevrecord);
reporter.footer(writer, null);
writer.flush();
} catch (Exception x) {
x.printStackTrace();
} finally {
try {
in.close();
} catch (Exception x) {
}
try {
reporter.close();
} catch (Exception x) {
}
}
}
use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.
the class RawIteratingWrapperTest method testCSVReader.
@Test
public void testCSVReader() throws Exception {
RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(RawIteratingWrapperTest.class.getClassLoader().getResourceAsStream("ambit2/core/data/io/test.txt")), CSVFormat.TDF);
MoleculeReader molreader = new MoleculeReader();
try {
int count = 0;
while (reader.hasNext()) {
IStructureRecord record = reader.nextRecord();
if (record.getSmiles() != null && record.getInchi() != null) {
Assert.assertNotNull(record.getContent());
Assert.assertNotNull(record.getSmiles());
Assert.assertNotNull(record.getInchi());
Assert.assertNotNull(record.getInchiKey());
}
Object o = reader.next();
Assert.assertNotNull(o);
Assert.assertTrue(o instanceof IStructureRecord);
IAtomContainer mol = molreader.process((IStructureRecord) o);
if (record.getSmiles() != null && !"".equals(record.getSmiles()))
Assert.assertTrue(String.format("Atoms %d", mol.getAtomCount()), mol.getAtomCount() > 0);
count++;
}
Assert.assertEquals(11, count);
} finally {
reader.close();
}
}
use of ambit2.core.io.RawIteratingCSVReader in project ambit-mirror by ideaconsult.
the class BatchDBProcessor method getIterator.
public Iterator<ITEMTYPE> getIterator(IInputState target) throws AmbitException {
if (target instanceof FileInputState)
try {
File _file = ((FileInputState) target).getFile();
if (_file.isDirectory()) {
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return !name.startsWith(".");
}
};
return new RawIteratingFolderReader(_file.listFiles(filter));
} else {
InputStream stream = null;
String filename = _file.getName();
if (filename.endsWith(_FILE_TYPE.GZ_INDEX.getExtension())) {
String uncompressed = filename.replaceAll(_FILE_TYPE.GZ_INDEX.getExtension(), "");
try {
stream = new GZIPInputStream(new FileInputStream(_file));
filename = uncompressed;
} catch (IOException x) {
throw new AmbitIOException(x);
}
} else
stream = new FileInputStream(_file);
if (FileInputState._FILE_TYPE.SDF_INDEX.hasExtension(filename)) {
RawIteratingSDFReader reader = new RawIteratingSDFReader(new InputStreamReader(stream));
if (getReference() == null)
reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
else
reader.setReference(getReference());
return reader;
} else if (FileInputState._FILE_TYPE.MOL_INDEX.hasExtension(filename)) {
RawIteratingMOLReader reader = new RawIteratingMOLReader(new InputStreamReader(stream));
if (getReference() == null)
reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
else
reader.setReference(getReference());
return reader;
/* TEST and replace the wrapper with this */
} else if (FileInputState._FILE_TYPE.CSV_INDEX.hasExtension(filename)) {
RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(stream), CSVFormat.EXCEL);
configureReader(reader, target, _file);
return reader;
} else if (FileInputState._FILE_TYPE.TXT_INDEX.hasExtension(filename)) {
RawIteratingCSVReader reader = new RawIteratingCSVReader(new InputStreamReader(stream), CSVFormat.TDF.withCommentMarker('#'));
configureReader(reader, target, _file);
return reader;
} else {
IIteratingChemObjectReader ir = FileInputState.getReader(stream, filename);
if (ir == null)
throw new AmbitException("Unsupported format " + filename);
else {
if (ir instanceof RawIteratingCSVReader) {
configureReader(((RawIteratingCSVReader) ir), target, _file);
}
RawIteratingWrapper reader = new RawIteratingWrapper(ir);
if (getReference() == null)
reader.setReference(LiteratureEntry.getInstance(filename, _file.getAbsolutePath()));
else
reader.setReference(getReference());
return reader;
}
}
}
} catch (IOException x) {
throw new AmbitIOException(x);
} catch (Exception x) {
throw new AmbitException(x);
}
else
throw new AmbitException("Not a file");
}
Aggregations