use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.
the class ProteinCoronaPaperReaderTest method test.
@Test
public void test() throws Exception {
RawIteratingWrapper reader = null;
try {
LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
entry.setType(_type.Dataset);
File baseDir = new File(System.getProperty("java.io.tmpdir"));
File datafile = new File(baseDir, "MergedSheets.csv");
if (!datafile.exists()) {
URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
DownloadTool.download(url, datafile);
}
CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
reader = new CSV12SubstanceReader(chemObjectReader);
int r = 0;
while (reader.hasNext()) {
IStructureRecord mol = reader.nextRecord();
Assert.assertTrue(mol instanceof SubstanceRecord);
System.out.println(((SubstanceRecord) mol).getPublicName());
System.out.println(((SubstanceRecord) mol).getMeasurements());
r++;
}
Assert.assertTrue(r >= 120);
} finally {
reader.close();
}
}
use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.
the class ProteinCoronaPaperReaderTest method testRDFExport.
@Test
public void testRDFExport() throws Exception {
CSV12SubstanceReader reader = null;
Request hack = new Request();
hack.setRootRef(new Reference("http://localhost/ambit2"));
File baseDir = new File(System.getProperty("java.io.tmpdir"));
File datafile = new File(baseDir, "MergedSheets.csv");
if (!datafile.exists()) {
URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
DownloadTool.download(url, datafile);
}
SubstanceRDFReporter r = new SubstanceRDFReporter(hack, MediaType.TEXT_RDF_N3);
Model model = ModelFactory.createDefaultModel();
r.header(model, null);
r.setOutput(model);
LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
entry.setType(_type.Dataset);
try {
CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
reader = new CSV12SubstanceReader(chemObjectReader);
while (reader.hasNext()) {
IStructureRecord record = reader.nextRecord();
Assert.assertTrue(record instanceof SubstanceRecord);
r.processItem((SubstanceRecord) record);
}
r.footer(model, null);
File output = new File(System.getProperty("java.io.tmpdir") + "/" + "protein_export.ttl");
System.out.println("Exported to " + output.getAbsolutePath());
OutputStream writer = new FileOutputStream(output);
RDFDataMgr.write(writer, model, RDFFormat.TURTLE);
} finally {
if (reader != null)
reader.close();
}
}
use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.
the class ProteinCoronaPaperReaderTest method testWriteProteinCoronaData.
@Test
public void testWriteProteinCoronaData() throws Exception {
setUpDatabaseFromResource("ambit2/db/processors/test/empty-datasets.xml");
IDatabaseConnection c = getConnection();
IRawReader<IStructureRecord> parser = null;
try {
LiteratureEntry entry = new LiteratureEntry("Protein Corona", "http://dx.doi.org/10.1021/nn406018q");
entry.setType(_type.Dataset);
File baseDir = new File(System.getProperty("java.io.tmpdir"));
File datafile = new File(baseDir, "MergedSheets.csv");
if (!datafile.exists()) {
URL url = new URL("https://raw.githubusercontent.com/ideaconsult/Protein_Corona/master/MergedSheets.csv");
DownloadTool.download(url, datafile);
}
CSV12Reader chemObjectReader = new CSV12Reader(new FileReader(datafile), entry, "PRCR-");
parser = new CSV12SubstanceReader(chemObjectReader);
write(parser, c.getConnection(), new ReferenceSubstanceUUID(), false);
} finally {
parser.close();
c.close();
}
}
use of net.idea.loom.nm.csv.CSV12Reader in project ambit-mirror by ideaconsult.
the class CallableSubstanceImporter method createBatch.
@Override
protected AbstractBatchProcessor createBatch(FileInputState target) throws Exception {
if (target == null)
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST);
final BatchDBProcessor<String> batch = new BatchDBProcessor<String>() {
/**
*/
private static final long serialVersionUID = 5712170806359764006L;
@Override
public Iterator<String> getIterator(IInputState target) throws AmbitException {
try {
IRawReader<IStructureRecord> reader = null;
File file = ((FileInputState) target).getFile();
String ext = file.getName().toLowerCase();
if (ext.endsWith(FileInputState._FILE_TYPE.I5Z_INDEX.getExtension())) {
if (writer instanceof DBSubstanceWriter)
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setSplitRecord(true);
((DBSubstanceWriter) writer).setI5mode(true);
}
reader = new I5ZReader(file);
((I5ZReader) reader).setQASettings(getQASettings());
} else if (ext.endsWith(FileInputState._FILE_TYPE.I6Z_INDEX.getExtension())) {
if (writer instanceof DBSubstanceWriter)
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setSplitRecord(true);
((DBSubstanceWriter) writer).setI5mode(true);
}
reader = new I6ZReader(file);
((I6ZReader) reader).setQASettings(getQASettings());
} else if (ext.endsWith(FileInputState._FILE_TYPE.CSV_INDEX.getExtension())) {
if (writer instanceof DBSubstanceWriter)
((DBSubstanceWriter) writer).setSplitRecord(false);
LiteratureEntry reference = new LiteratureEntry(originalname, originalname);
reader = new CSV12SubstanceReader(new CSV12Reader(new FileReader(file), reference, "FCSV-"));
} else if (ext.endsWith(".rdf")) {
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setSplitRecord(false);
((DBSubstanceWriter) writer).setImportBundles(true);
}
reader = new NanoWikiRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), null, "RDF/XML");
} else if (ext.endsWith(".ttl")) {
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setSplitRecord(false);
((DBSubstanceWriter) writer).setImportBundles(true);
}
reader = new ENanoMapperRDFReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), "ENM3");
} else if (FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext) || FileInputState._FILE_TYPE.XLS_INDEX.hasExtension(ext)) {
if (configFile == null)
throw new AmbitException("XLSX/XLSX file import requires a JSON configuration file");
final StructureRecordValidator validator = new StructureRecordValidator(file.getName(), true, "XLSX");
reader = new GenericExcelParser(new FileInputStream(file), configFile, FileInputState._FILE_TYPE.XLSX_INDEX.hasExtension(ext)) {
public Object next() {
Object record = super.next();
try {
if (record instanceof IStructureRecord)
record = validator.process((IStructureRecord) record);
} catch (Exception x) {
}
return record;
}
};
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setSplitRecord(false);
/*
* ((DBSubstanceWriter) writer)
* .setClearComposition(false); ((DBSubstanceWriter)
* writer) .setClearMeasurements(false);
*/
}
} else if (ext.endsWith(".json")) {
if (writer instanceof DBSubstanceWriter)
((DBSubstanceWriter) writer).setSplitRecord(false);
reader = new SubstanceStudyParser(new InputStreamReader(new FileInputStream(file), "UTF-8")) {
protected EffectRecord createEffectRecord(Protocol protocol) {
try {
I5_ROOT_OBJECTS category = I5_ROOT_OBJECTS.valueOf(protocol.getCategory() + "_SECTION");
return category.createEffectRecord();
} catch (Exception x) {
return super.createEffectRecord(protocol);
}
}
};
if (writer instanceof DBSubstanceWriter) {
((DBSubstanceWriter) writer).setClearComposition(false);
((DBSubstanceWriter) writer).setClearMeasurements(false);
}
} else {
throw new AmbitException("Unsupported format " + file);
}
reader.setErrorHandler(new IChemObjectReaderErrorHandler() {
@Override
public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
}
@Override
public void handleError(String message, int row, int colStart, int colEnd) {
}
@Override
public void handleError(String message, Exception exception) {
}
@Override
public void handleError(String message) {
}
});
return reader;
} catch (AmbitException x) {
throw x;
} catch (Exception x) {
throw new AmbitException(x);
}
}
};
return batch;
}
Aggregations