use of ambit2.core.io.FileInputState in project ambit-mirror by ideaconsult.
the class CallableFileImport method importFile.
public TaskResult importFile(File file) throws Exception {
try {
// if target dataset is not defined, create new dataset
final SourceDataset dataset = targetDataset != null ? targetDataset : datasetMeta(file);
if (targetDataset == null)
dataset.setId(-1);
final BatchDBProcessor<String> batch = new BatchDBProcessor<String>() {
/**
*/
private static final long serialVersionUID = -7971761364143510120L;
@Override
public Iterator<String> getIterator(IInputState target) throws AmbitException {
try {
File file = ((FileInputState) target).getFile();
RDFIteratingReader i = getRDFIterator(file, getReporter().getBaseReference().toString());
if (i == null) {
IIteratingChemObjectReader ni = getNanoCMLIterator(file, getReporter().getBaseReference().toString());
if (ni == null)
return super.getIterator(target);
else
return ni;
} else {
/*
* RDFMetaDatasetIterator datasets = null; try {
* datasets = new
* RDFMetaDatasetIterator(i.getJenaModel());
* datasets
* .setBaseReference(getReporter().getBaseReference
* ()); while (datasets.hasNext()) { SourceDataset d
* = datasets.next(); dataset.setId(d.getId());
* dataset.setName(d.getName());
* dataset.setTitle(d.getTitle());
* dataset.setURL(d.getURL()); } } catch (Exception
* x) { x.printStackTrace(); } finally { try {
* datasets.close();} catch (Exception x) {} }
*/
return i;
}
} catch (AmbitException x) {
throw x;
} catch (Exception x) {
throw new AmbitException(x);
}
}
@Override
public void onItemProcessed(String input, Object output, IBatchStatistics stats) {
super.onItemProcessed(input, output, stats);
if (firstCompoundOnly && (stats.getRecords(RECORDS_STATS.RECORDS_PROCESSED) >= 1)) {
cancelled = true;
if (output != null)
if ((output instanceof ArrayList) && ((ArrayList) output).size() > 0) {
if (((ArrayList) output).get(0) instanceof IStructureRecord)
recordImported = (IStructureRecord) ((ArrayList) output).get(0);
} else if (output instanceof IStructureRecord)
recordImported = (IStructureRecord) output;
}
}
};
batch.setReference(dataset.getReference());
batch.setConnection(connection);
final RepositoryWriter writer = new RepositoryWriter();
writer.setUseExistingStructure(isPropertyOnly());
writer.setPropertyKey(getMatcher());
writer.setDataset(dataset);
final ProcessorsChain<String, IBatchStatistics, IProcessor> chain = new ProcessorsChain<String, IBatchStatistics, IProcessor>();
chain.add(writer);
batch.setProcessorChain(chain);
writer.setConnection(connection);
FileInputState fin = new FileInputState(file);
IBatchStatistics stats = batch.process(fin);
if (firstCompoundOnly) {
if (recordImported == null)
throw new Exception("No compound imported");
if (compoundReporter == null)
compoundReporter = new ConformerURIReporter("", null, false);
try {
batch.close();
} catch (Exception xx) {
}
return new TaskResult(compoundReporter.getURI(recordImported));
} else {
ReadDataset q = new ReadDataset();
q.setValue(dataset);
QueryExecutor<ReadDataset> x = new QueryExecutor<ReadDataset>();
x.setConnection(connection);
ResultSet rs = x.process(q);
ISourceDataset newDataset = null;
while (rs.next()) {
newDataset = q.getObject(rs);
break;
}
x.closeResults(rs);
x.setConnection(null);
if (newDataset == null)
throw new ResourceException(Status.SUCCESS_NO_CONTENT);
if (reporter == null)
reporter = new DatasetURIReporter<IQueryRetrieval<ISourceDataset>, ISourceDataset>();
try {
batch.close();
} catch (Exception xx) {
}
return new TaskResult(reporter.getURI(newDataset));
}
} catch (ResourceException x) {
throw x;
} catch (Exception x) {
throw new ResourceException(new Status(Status.SERVER_ERROR_INTERNAL, x.getMessage()));
} finally {
try {
connection.close();
} catch (Exception x) {
}
connection = null;
}
}
use of ambit2.core.io.FileInputState in project ambit-mirror by ideaconsult.
the class RawIteratingFolderReader method getItemReader.
protected IRawReader<IStructureRecord> getItemReader(int index) throws Exception {
String name = files[index].getName().toLowerCase();
if (name.endsWith(FileInputState._FILE_TYPE.SDF_INDEX.getExtension())) {
RawIteratingSDFReader r = new RawIteratingSDFReader(new FileReader(files[index]));
r.setReference(LiteratureEntry.getInstance(files[index].getName(), "file:///" + files[index].getAbsolutePath()));
return (IRawReader<IStructureRecord>) r;
} else if (name.endsWith(FileInputState._FILE_TYPE.MOL_INDEX.getExtension())) {
RawIteratingMOLReader r = new RawIteratingMOLReader(new FileReader(files[index]));
r.setReference(LiteratureEntry.getInstance(files[index].getName(), "file:///" + files[index].getAbsolutePath()));
return (IRawReader<IStructureRecord>) r;
} else if (name.endsWith(FileInputState._FILE_TYPE.I5D_INDEX.getExtension())) {
IIteratingChemObjectReader r = FileInputState.getI5DReader(files[index]);
if (r instanceof ICiteable) {
((ICiteable) r).setReference(LiteratureEntry.getI5UUIDReference());
}
return (IRawReader<IStructureRecord>) r;
} else if (name.endsWith(FileInputState._FILE_TYPE.ZIP_INDEX.getExtension())) {
return new ZipReader(files[index]);
} else if (name.endsWith(FileInputState._FILE_TYPE.GZ_INDEX.getExtension())) {
// assuming gzipped sdf only...
InputStreamReader reader = new InputStreamReader(new GZIPInputStream(new FileInputStream(files[index])));
return new RawIteratingSDFReader(reader);
} else
throw new Exception("Unsupported format " + name);
}
use of ambit2.core.io.FileInputState in project ambit-mirror by ideaconsult.
the class DataSet method getReader.
public static IIteratingChemObjectReader<IAtomContainer> getReader(InputStream in, String extension) throws CDKException, AmbitIOException {
FileInputState instate = new FileInputState();
IIteratingChemObjectReader<IAtomContainer> reader;
if (extension.endsWith(FileInputState._FILE_TYPE.SDF_INDEX.getExtension())) {
reader = new InteractiveIteratingMDLReader(in, SilentChemObjectBuilder.getInstance());
((InteractiveIteratingMDLReader) reader).setSkip(true);
} else
reader = instate.getReader(in, extension);
reader.setReaderMode(Mode.RELAXED);
reader.setErrorHandler(new IChemObjectReaderErrorHandler() {
@Override
public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
exception.printStackTrace();
}
@Override
public void handleError(String message, int row, int colStart, int colEnd) {
System.out.println(message);
}
@Override
public void handleError(String message, Exception exception) {
exception.printStackTrace();
}
@Override
public void handleError(String message) {
System.out.println(message);
}
});
return reader;
}
use of ambit2.core.io.FileInputState in project ambit-mirror by ideaconsult.
the class SLNCli method getReader.
public IIteratingChemObjectReader<IAtomContainer> getReader(InputStream in, String extension) throws CDKException, AmbitIOException {
FileInputState instate = new FileInputState();
IIteratingChemObjectReader<IAtomContainer> reader;
if (extension.endsWith(FileInputState._FILE_TYPE.SDF_INDEX.getExtension())) {
reader = new InteractiveIteratingMDLReader(in, SilentChemObjectBuilder.getInstance());
((InteractiveIteratingMDLReader) reader).setSkip(true);
} else
reader = instate.getReader(in, extension);
reader.setReaderMode(Mode.RELAXED);
reader.setErrorHandler(new IChemObjectReaderErrorHandler() {
@Override
public void handleError(String message, int row, int colStart, int colEnd, Exception exception) {
exception.printStackTrace();
}
@Override
public void handleError(String message, int row, int colStart, int colEnd) {
System.out.println(message);
}
@Override
public void handleError(String message, Exception exception) {
exception.printStackTrace();
}
@Override
public void handleError(String message) {
System.out.println(message);
}
});
return reader;
}
use of ambit2.core.io.FileInputState in project ambit-mirror by ideaconsult.
the class Context method parseCommandFingerprints.
public void parseCommandFingerprints(String subcommand, long now) throws Exception {
boolean multifile = true;
int page = parsePageParam();
int pagesize = parsePageSizeParam();
final boolean fp_count = parseWriteCountParam();
final boolean fp_raw = parseWriteRawParam();
String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
String inchi_header = parseInputTag_Param("inchi", "InChI");
String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
Object tmpTag = parseSdfTitleParam();
final String[] tags_to_keep = parsetags_to_keep();
final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
final File file = getInputFile();
FileInputState input = new FileInputState(file);
input.setOptionalSMILESHeader(smiles_header);
input.setOptionalInChIHeader(inchi_header);
input.setOptionalInChIKeyHeader(inchikey_header);
if (options.output == null)
throw new FileNotFoundException("Output file not specified. Please use -o {file}");
final File outfile = new File(options.output);
logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
final List<IFingerprinter> fps = parseFingerprinterParams();
FileOutputState out = null;
final IChemObjectWriter awriter;
if (multifile) {
awriter = new MultiFingerprintsWriter(outfile, fps, tags_to_keep);
} else {
out = new FileOutputState(outfile);
awriter = out.getWriter();
if (awriter instanceof FilesWithHeaderWriter)
((FilesWithHeaderWriter) awriter).setAddSMILEScolumn(false);
}
final IChemObjectWriter writer = awriter;
final boolean writesdf = writer instanceof SDFWriter;
final Map<Object, Property> tags = new HashMap<>();
Property newtag = Property.getSMILESInstance();
newtag.setName("SMILES");
newtag.setEnabled(false);
tags.put(Property.opentox_SMILES, newtag);
tags.put(Property.getSMILESInstance(), newtag);
newtag = Property.getInChIInstance();
newtag.setEnabled(false);
tags.put(Property.opentox_InChI, newtag);
tags.put("InChI", newtag);
tags.put(Property.getInChIInstance(), newtag);
newtag = Property.getInChIKeyInstance();
newtag.setEnabled(true);
newtag.setName("InChIKey");
tags.put(Property.opentox_InChIKey, newtag);
tags.put(Property.getInChIKeyInstance(), newtag);
newtag = Property.getInstance(CDKConstants.TITLE, CDKConstants.TITLE);
newtag.setEnabled(false);
tags.put(CDKConstants.TITLE, newtag);
tags.put(newtag, newtag);
newtag = Property.getInstance("CHEMBL", "CHEMBL");
newtag.setEnabled(false);
tags.put("CHEMBL", newtag);
tags.put(newtag, newtag);
final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {
@Override
public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
super.onItemRead(input, stats);
if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
cancel();
}
@Override
public boolean skip(IStructureRecord input, IBatchStatistics stats) {
return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
}
@Override
public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
super.onItemSkipped(input, stats);
if (stats.isTimeToPrint(getSilentInterval() * 2))
propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
}
@Override
public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
}
@Override
public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
super.onError(input, output, stats, x);
logger_cli.log(Level.SEVERE, "MSG_ERR", new Object[] { x.getMessage() });
}
@Override
public long getSilentInterval() {
return 30000L;
}
@Override
public void close() throws Exception {
try {
writer.close();
} catch (Exception x) {
} finally {
}
super.close();
}
};
batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {
protected MoleculeReader molReader = new MoleculeReader(true, false);
@Override
public IStructureRecord process(IStructureRecord record) throws Exception {
IAtomContainer mol;
IAtomContainer processed = null;
try {
mol = molReader.process(record);
if (mol != null) {
for (Property p : record.getRecordProperties()) {
Object v = record.getRecordProperty(p);
String pname = p.getName().replace("http://www.opentox.org/api/1.1#", "");
// already parsed
if (tags_to_keep != null && Arrays.binarySearch(tags_to_keep, pname) < 0)
continue;
else
mol.setProperty(p, v);
}
if (tags_to_keep != null) {
List<String> toRemove = null;
Iterator pi = mol.getProperties().keySet().iterator();
while (pi.hasNext()) {
Object p = pi.next();
if (Arrays.binarySearch(tags_to_keep, p.toString()) < 0) {
if (toRemove == null)
toRemove = new ArrayList<String>();
toRemove.add(p.toString());
}
}
if (toRemove != null)
for (String propertyToRemove : toRemove) mol.removeProperty(propertyToRemove);
}
} else {
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
return record;
}
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
} catch (Exception x) {
logger_cli.log(Level.SEVERE, "MSG_ERR_MOLREAD", new Object[] { getIds(record), x.toString() });
return record;
} finally {
}
processed = mol;
for (IFingerprinter fp : fps) {
ICountFingerprint cfp = null;
try {
cfp = fp.getCountFingerprint(processed);
} catch (Exception x) {
logger.log(Level.FINER, x.getMessage());
}
IBitFingerprint bfp = null;
try {
bfp = fp.getBitFingerprint(processed);
} catch (Exception x) {
}
Map<String, Integer> fpraw = null;
try {
if (fp_raw)
fpraw = fp.getRawFingerprint(processed);
} catch (Exception x) {
}
try {
if (cfp != null) {
if (fp_count)
processed.setProperty(fp.getClass().getName() + ".count", cfp);
processed.setProperty(fp.getClass().getName(), cfp);
}
if (bfp != null)
processed.setProperty(fp.getClass().getName() + ".hashed", bfp);
if (fpraw != null)
processed.setProperty(fp.getClass().getName() + ".raw", fpraw);
} catch (Exception x) {
// StringWriter w = new StringWriter();
// x.printStackTrace(new PrintWriter(w));
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
if (processed != null)
processed.setProperty("ERROR." + fp.getClass().getName(), x.getMessage());
} finally {
if (processed != null)
processed.addProperties(mol.getProperties());
}
}
if (processed != null)
try {
if (writesdf && sdf_title != null) {
for (Entry<Object, Object> p : processed.getProperties().entrySet()) if (sdf_title.equals(p.getKey().toString().toLowerCase())) {
processed.setProperty(CDKConstants.TITLE, p.getValue());
break;
}
}
StructureStandardizer.renameTags(processed, tags, true);
writer.write(processed);
} catch (Exception x) {
// StringWriter w = new StringWriter();
// x.printStackTrace(new PrintWriter(w));
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
}
else {
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
}
return record;
}
});
batch.addPropertyChangeListener(new PropertyChangeListener() {
@Override
public void propertyChange(PropertyChangeEvent evt) {
if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
logger_cli.log(Level.INFO, evt.getNewValue().toString());
}
});
/*
* standardprocessor.setCallback(new
* DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
*
* @Override public IAtomContainer process(IAtomContainer target) throws
* Exception { try { //writer.write(target); } catch (Exception x) {
* logger.log(Level.SEVERE, x.getMessage()); } return target; } });
*/
IBatchStatistics stats = null;
try {
stats = batch.process(input);
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.getMessage(), x);
} finally {
try {
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
try {
if (batch != null)
batch.close();
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
if (stats != null)
logger_cli.log(Level.INFO, stats.toString());
}
}
Aggregations