use of eu.esdihumboldt.hale.common.instance.orient.OInstance in project hale by halestudio.
the class Transformation method transform.
/**
* Transform the given instances, according to the given alignment.
*
* @param sources the collection of source instances
* @param targetSink the target sink
* @param exportJob the export job
* @param validationJob the validation job, may be <code>null</code>
* @param alignment the alignment, may not be changed outside this method
* @param sourceSchema the source schema
* @param reportHandler the report handler
* @param serviceProvider the service provider in the transformation context
* @param processId the identifier for the transformation process, may be
* <code>null</code> if grouping the jobs to a job family is not
* necessary
* @return the future representing the successful completion of the
* transformation (note that a successful completion doesn't
* necessary mean there weren't any internal transformation errors)
*/
public static ListenableFuture<Boolean> transform(InstanceCollection sources, final TransformationSink targetSink, final ExportJob exportJob, final ValidationJob validationJob, final Alignment alignment, SchemaSpace sourceSchema, final ReportHandler reportHandler, final ServiceProvider serviceProvider, final Object processId) {
final SettableFuture<Boolean> result = SettableFuture.create();
final InstanceCollection sourceToUse;
// Check whether to create a temporary database or not.
// Currently do not create a temporary DB is there are Retypes/Creates
// only.
boolean useTempDatabase = false;
final LocalOrientDB db;
for (Cell cell : alignment.getActiveTypeCells()) if (!isStreamingTypeTransformation(cell.getTransformationIdentifier())) {
useTempDatabase = true;
break;
}
// Create temporary database if necessary.
if (useTempDatabase) {
// create db
File tmpDir = Files.createTempDir();
db = new LocalOrientDB(tmpDir);
tmpDir.deleteOnExit();
// get instance collection
// sourceToUse = new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE);
// only yield instances that were actually inserted
// this is also done in OrientInstanceService
// TODO make configurable?
sourceToUse = FilteredInstanceCollection.applyFilter(new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE), new Filter() {
@Override
public boolean match(Instance instance) {
if (instance instanceof OInstance) {
return ((OInstance) instance).isInserted();
}
return true;
}
});
} else {
sourceToUse = new StatsCountInstanceCollection(sources, reportHandler);
db = null;
}
// create transformation job
final AbstractTransformationJob transformJob = new AbstractTransformationJob("Transformation") {
/**
* @see org.eclipse.core.runtime.jobs.Job#run(org.eclipse.core.runtime.IProgressMonitor)
*/
@Override
protected IStatus run(IProgressMonitor monitor) {
TransformationService transformationService = HalePlatform.getService(TransformationService.class);
TransformationReport report = transformationService.transform(alignment, sourceToUse, targetSink, serviceProvider, new ProgressMonitorIndicator(monitor));
try {
// publish report
reportHandler.publishReport(report);
if (report.isSuccess()) {
return Status.OK_STATUS;
} else {
return ERROR_STATUS;
}
} finally {
// and may lead to the transformation report being lost
if (monitor.isCanceled()) {
targetSink.done(true);
return Status.CANCEL_STATUS;
} else {
targetSink.done(false);
}
}
}
};
// set process IDs to group jobs in a job family
if (processId != null) {
transformJob.setProcessId(processId);
exportJob.setProcessId(processId);
if (validationJob != null) {
validationJob.setProcessId(processId);
}
}
exportJob.setUser(true);
// the jobs should cancel each other
transformJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// log transformation job error (because it otherwise gets
// lost)
String msg = "Error during transformation";
if (event.getResult().getMessage() != null) {
msg = ": " + event.getResult().getMessage();
}
log.error(msg, event.getResult().getException());
// failing transformation is done by cancelling the export
exportJob.cancel();
}
if (db != null) {
db.delete();
}
}
});
// after export is done, validation should run
exportJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
transformJob.cancel();
// failure
failure(result, event);
} else {
if (validationJob == null) {
// success
result.set(true);
} else {
// schedule the validation job
validationJob.schedule();
}
}
}
});
// validation ends the process
if (validationJob != null) {
validationJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// failure
failure(result, event);
} else {
// success
result.set(true);
}
}
});
}
if (useTempDatabase) {
// Initialize instance index with alignment
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
indexService.addPropertyMappings(alignment.getActiveTypeCells(), serviceProvider);
// run store instance job first...
Job storeJob = new StoreInstancesJob("Load source instances into temporary database", db, sources, serviceProvider, reportHandler, true) {
@Override
protected void onComplete() {
// onComplete is also called if monitor is cancelled...
}
@Override
public boolean belongsTo(Object family) {
if (processId == null) {
return super.belongsTo(family);
}
return AbstractTransformationJob.createFamily(processId).equals(family);
}
};
// and schedule jobs on successful completion
storeJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (event.getResult().isOK()) {
exportJob.schedule();
transformJob.schedule();
} else {
failure(result, event);
}
}
});
storeJob.schedule();
} else {
// otherwise feed InstanceProcessors directly from the
// InstanceCollection...
// TODO Implement differently, not w/ PseudoInstanceReference which
// will cause memory problems
// final InstanceProcessingExtension ext = new InstanceProcessingExtension(
// serviceProvider);
// final List<InstanceProcessor> processors = ext.getInstanceProcessors();
//
// ResourceIterator<Instance> it = sourceToUse.iterator();
// try {
// while (it.hasNext()) {
// Instance instance = it.next();
//
// ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(
// new PseudoInstanceReference(instance), sourceToUse);
// processors.forEach(p -> p.process(instance, resolvableRef));
//
// }
// } finally {
// it.close();
// }
// ...and schedule jobs
exportJob.schedule();
transformJob.schedule();
}
return result;
}
use of eu.esdihumboldt.hale.common.instance.orient.OInstance in project hale by halestudio.
the class StoreInstancesJob method run.
/**
* @see Job#run(IProgressMonitor)
*/
@Override
public IStatus run(IProgressMonitor monitor) {
boolean exactProgress = instances.hasSize();
monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN));
AtomicInteger count = new AtomicInteger(0);
TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>();
if (report != null) {
// set the correct start time
report.setStartTime(new Date());
}
// get database connection
DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite();
ODatabaseDocumentTx db = ref.getDatabase();
ATransaction trans = log.begin("Store instances in database");
try {
// use intent
db.declareIntent(new OIntentMassiveInsert());
// Find all the InstanceProcessors to feed them the stored Instances
final List<InstanceProcessor> processors;
if (doProcessing) {
final InstanceProcessingExtension ext = new InstanceProcessingExtension(serviceProvider);
processors = ext.getInstanceProcessors();
} else {
processors = Collections.emptyList();
}
BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE);
final InstanceIndexService indexService;
if (doProcessing) {
indexService = serviceProvider.getService(InstanceIndexService.class);
} else {
indexService = null;
}
// TODO decouple next() and save()?
SimpleLogContext.withLog(report, () -> {
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
ResourceIterator<Instance> it = instances.iterator();
int size = instances.size();
try {
while (it.hasNext() && !monitor.isCanceled()) {
// last count update
long lastUpdate = 0;
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
Instance instance = it.next();
// further processing before storing
processInstance(instance);
// get/create OInstance
OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance)));
conv.setInserted(true);
// update the instance to store, e.g. generating
// metadata
updateInstance(conv);
ODatabaseRecordThreadLocal.INSTANCE.set(db);
// configure the document
ODocument doc = conv.configureDocument(db);
// and save it
doc.save();
// Create an InstanceReference for the saved instance
// and
// feed it to all known InstanceProcessors. The
// decoration
// with ResolvableInstanceReference allows the
// InstanceProcessors to resolve the instances if
// required.
OrientInstanceReference oRef = new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition());
IdentifiableInstanceReference idRef = new IdentifiableInstanceReference(oRef, doc.getIdentity());
ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(idRef, browser);
processors.forEach(p -> p.process(instance, resolvableRef));
if (indexService != null) {
indexService.add(instance, resolvableRef);
}
count.incrementAndGet();
TypeDefinition type = instance.getDefinition();
if (type != null) {
typeCount.adjustOrPutValue(type.getName(), 1, 1);
}
if (exactProgress) {
monitor.worked(1);
}
long now = System.currentTimeMillis();
if (now - lastUpdate > 100) {
// only update every 100
// milliseconds
monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count.get()), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : ""));
lastUpdate = now;
}
}
} finally {
it.close();
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(null);
}
}
});
db.declareIntent(null);
} catch (RuntimeException e) {
if (report != null) {
reportTypeCount(report, typeCount);
report.error(new MessageImpl("Error storing instances in database", e));
report.setSuccess(false);
reportHandler.publishReport(report);
}
throw e;
} finally {
ref.dispose();
trans.end();
/*
* Reset instances to prevent memory leak. It seems Eclipse
* internally holds a reference to the job (in JobInfo and/or
* ProgressMonitorFocusJobDialog) and this results in the instance
* collection not being garbage collected. This is especially bad,
* if an in-memory instance collection is used, e.g. a
* DefaultInstanceCollection that is used when loading a Shapefile.
*/
instances = null;
}
try {
onComplete();
} catch (RuntimeException e) {
String message = "Error while post processing stored instances";
if (report != null) {
report.error(new MessageImpl(message, e));
} else {
log.error(message, e);
}
}
String message = MessageFormat.format("Stored {0} instances in the database.", count);
if (monitor.isCanceled()) {
String warn = "Loading instances was canceled, incomplete data set in the database.";
if (report != null) {
report.warn(new MessageImpl(warn, null));
} else {
log.warn(warn);
}
}
if (report != null) {
reportTypeCount(report, typeCount);
report.setSuccess(true);
report.setSummary(message);
reportHandler.publishReport(report);
} else {
log.info(message);
}
monitor.done();
return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message);
}
use of eu.esdihumboldt.hale.common.instance.orient.OInstance in project hale by halestudio.
the class OrientInstanceReference method load.
/**
* Load the instance specified by the reference from the given database.
*
* @param lodb the database
* @param owner the instance collection owning the reference
* @return the instance or <code>null</code> if no instance matching the
* reference is present
*/
public Instance load(LocalOrientDB lodb, Object owner) {
SharedDatabaseConnection connection = SharedDatabaseConnection.openRead(lodb, owner);
DatabaseReference<ODatabaseDocumentTx> db = connection.getDb();
DatabaseHandle handle = connection.getHandle();
try {
ODocument document = db.getDatabase().load(getId());
if (document != null) {
OInstance instance = new OInstance(document, getTypeDefinition(), db.getDatabase(), getDataSet());
return handle.addInstance(instance);
} else
return null;
} catch (IllegalArgumentException e) {
// ignore - instance does not exist
return null;
} catch (ODatabaseException e) {
// ODatabaseException
if (!(e.getCause() instanceof IllegalArgumentException)) {
log.error("Failed to retrieve instance with ID " + id, e);
}
return null;
} finally {
// connection is closed in DatabaseHandle
db.dispose(false);
// try closing the database handle (e.g. if no objects were
// added)
handle.tryClose();
}
}
use of eu.esdihumboldt.hale.common.instance.orient.OInstance in project hale by halestudio.
the class OrientInstanceReference method createReference.
/**
* Create a reference for an instance
*
* @param instance the instance, which must be an {@link OInstance}
* @return the instance reference
* @throws IllegalArgumentException if the instance is no {@link OInstance}
*/
public static InstanceReference createReference(Instance instance) {
if (instance.getDataSet() == null) {
throw new IllegalArgumentException("Instance data set may not be null for retrieving reference");
}
while (instance instanceof InstanceDecorator) {
instance = ((InstanceDecorator) instance).getOriginalInstance();
}
OInstance inst = (OInstance) instance;
ORID id = inst.getDocument().getIdentity();
return new OrientInstanceReference(id, instance.getDataSet(), inst.getDefinition());
}
use of eu.esdihumboldt.hale.common.instance.orient.OInstance in project hale by halestudio.
the class OSerializationHelper method convertFromDB.
/**
* Convert a value received from the database, e.g. {@link ODocument}s to
* {@link Instance}s, {@link Group}s or unwraps contained values.
*
* @param value the value
* @param parent the parent group
* @param childName the name of the child the value is associated to
* @return the converted object
*/
public static Object convertFromDB(Object value, OGroup parent, QName childName) {
if (value instanceof ODocument) {
ODocument doc = (ODocument) value;
if (doc.containsField(BINARY_WRAPPER_FIELD) || doc.containsField(OSerializationHelper.FIELD_SERIALIZATION_TYPE)) {
// extract wrapped ORecordBytes
return OSerializationHelper.deserialize(doc, parent, childName);
} else {
ChildDefinition<?> child = parent.getDefinition().getChild(childName);
if (child.asProperty() != null) {
return new OInstance((ODocument) value, child.asProperty().getPropertyType(), parent.getDb(), // no data set necessary for
null);
// nested instances
} else if (child.asGroup() != null) {
return new OGroup((ODocument) value, child.asGroup(), parent.getDb());
} else {
throw new IllegalStateException("Field " + childName + " is associated neither with a property nor a group.");
}
}
}
// TODO objects that are not supported inside document
if (value instanceof ORecordBytes) {
// XXX should not be reached as every ORecordBytes should be
// contained in a wrapper
// TODO try conversion first?!
// object deserialization
ORecordBytes record = (ORecordBytes) value;
ByteArrayInputStream bytes = new ByteArrayInputStream(record.toStream());
try {
ObjectInputStream in = new ObjectInputStream(bytes) {
@Override
protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException {
Class<?> result = resolved.get(desc.getName());
if (result == null) {
result = OsgiUtils.loadClass(desc.getName(), null);
if (resolved.size() > 200) {
resolved.entrySet().iterator().remove();
}
resolved.put(desc.getName(), result);
}
return result;
}
};
return in.readObject();
} catch (Exception e) {
throw new IllegalStateException("Could not deserialize field value.", e);
}
}
return value;
}
Aggregations