use of eu.esdihumboldt.hale.common.instance.index.InstanceIndexService in project hale by halestudio.
the class Transformation method transform.
/**
* Transform the given instances, according to the given alignment.
*
* @param sources the collection of source instances
* @param targetSink the target sink
* @param exportJob the export job
* @param validationJob the validation job, may be <code>null</code>
* @param alignment the alignment, may not be changed outside this method
* @param sourceSchema the source schema
* @param reportHandler the report handler
* @param serviceProvider the service provider in the transformation context
* @param processId the identifier for the transformation process, may be
* <code>null</code> if grouping the jobs to a job family is not
* necessary
* @return the future representing the successful completion of the
* transformation (note that a successful completion doesn't
* necessary mean there weren't any internal transformation errors)
*/
public static ListenableFuture<Boolean> transform(InstanceCollection sources, final TransformationSink targetSink, final ExportJob exportJob, final ValidationJob validationJob, final Alignment alignment, SchemaSpace sourceSchema, final ReportHandler reportHandler, final ServiceProvider serviceProvider, final Object processId) {
final SettableFuture<Boolean> result = SettableFuture.create();
final InstanceCollection sourceToUse;
// Check whether to create a temporary database or not.
// Currently do not create a temporary DB is there are Retypes/Creates
// only.
boolean useTempDatabase = false;
final LocalOrientDB db;
for (Cell cell : alignment.getActiveTypeCells()) if (!isStreamingTypeTransformation(cell.getTransformationIdentifier())) {
useTempDatabase = true;
break;
}
// Create temporary database if necessary.
if (useTempDatabase) {
// create db
File tmpDir = Files.createTempDir();
db = new LocalOrientDB(tmpDir);
tmpDir.deleteOnExit();
// get instance collection
// sourceToUse = new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE);
// only yield instances that were actually inserted
// this is also done in OrientInstanceService
// TODO make configurable?
sourceToUse = FilteredInstanceCollection.applyFilter(new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE), new Filter() {
@Override
public boolean match(Instance instance) {
if (instance instanceof OInstance) {
return ((OInstance) instance).isInserted();
}
return true;
}
});
} else {
sourceToUse = new StatsCountInstanceCollection(sources, reportHandler);
db = null;
}
// create transformation job
final AbstractTransformationJob transformJob = new AbstractTransformationJob("Transformation") {
/**
* @see org.eclipse.core.runtime.jobs.Job#run(org.eclipse.core.runtime.IProgressMonitor)
*/
@Override
protected IStatus run(IProgressMonitor monitor) {
TransformationService transformationService = HalePlatform.getService(TransformationService.class);
TransformationReport report = transformationService.transform(alignment, sourceToUse, targetSink, serviceProvider, new ProgressMonitorIndicator(monitor));
try {
// publish report
reportHandler.publishReport(report);
if (report.isSuccess()) {
return Status.OK_STATUS;
} else {
return ERROR_STATUS;
}
} finally {
// and may lead to the transformation report being lost
if (monitor.isCanceled()) {
targetSink.done(true);
return Status.CANCEL_STATUS;
} else {
targetSink.done(false);
}
}
}
};
// set process IDs to group jobs in a job family
if (processId != null) {
transformJob.setProcessId(processId);
exportJob.setProcessId(processId);
if (validationJob != null) {
validationJob.setProcessId(processId);
}
}
exportJob.setUser(true);
// the jobs should cancel each other
transformJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// log transformation job error (because it otherwise gets
// lost)
String msg = "Error during transformation";
if (event.getResult().getMessage() != null) {
msg = ": " + event.getResult().getMessage();
}
log.error(msg, event.getResult().getException());
// failing transformation is done by cancelling the export
exportJob.cancel();
}
if (db != null) {
db.delete();
}
}
});
// after export is done, validation should run
exportJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
transformJob.cancel();
// failure
failure(result, event);
} else {
if (validationJob == null) {
// success
result.set(true);
} else {
// schedule the validation job
validationJob.schedule();
}
}
}
});
// validation ends the process
if (validationJob != null) {
validationJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// failure
failure(result, event);
} else {
// success
result.set(true);
}
}
});
}
if (useTempDatabase) {
// Initialize instance index with alignment
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
indexService.addPropertyMappings(alignment.getActiveTypeCells(), serviceProvider);
// run store instance job first...
Job storeJob = new StoreInstancesJob("Load source instances into temporary database", db, sources, serviceProvider, reportHandler, true) {
@Override
protected void onComplete() {
// onComplete is also called if monitor is cancelled...
}
@Override
public boolean belongsTo(Object family) {
if (processId == null) {
return super.belongsTo(family);
}
return AbstractTransformationJob.createFamily(processId).equals(family);
}
};
// and schedule jobs on successful completion
storeJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (event.getResult().isOK()) {
exportJob.schedule();
transformJob.schedule();
} else {
failure(result, event);
}
}
});
storeJob.schedule();
} else {
// otherwise feed InstanceProcessors directly from the
// InstanceCollection...
// TODO Implement differently, not w/ PseudoInstanceReference which
// will cause memory problems
// final InstanceProcessingExtension ext = new InstanceProcessingExtension(
// serviceProvider);
// final List<InstanceProcessor> processors = ext.getInstanceProcessors();
//
// ResourceIterator<Instance> it = sourceToUse.iterator();
// try {
// while (it.hasNext()) {
// Instance instance = it.next();
//
// ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(
// new PseudoInstanceReference(instance), sourceToUse);
// processors.forEach(p -> p.process(instance, resolvableRef));
//
// }
// } finally {
// it.close();
// }
// ...and schedule jobs
exportJob.schedule();
transformJob.schedule();
}
return result;
}
use of eu.esdihumboldt.hale.common.instance.index.InstanceIndexService in project hale by halestudio.
the class StoreInstancesJob method run.
/**
* @see Job#run(IProgressMonitor)
*/
@Override
public IStatus run(IProgressMonitor monitor) {
boolean exactProgress = instances.hasSize();
monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN));
AtomicInteger count = new AtomicInteger(0);
TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>();
if (report != null) {
// set the correct start time
report.setStartTime(new Date());
}
// get database connection
DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite();
ODatabaseDocumentTx db = ref.getDatabase();
ATransaction trans = log.begin("Store instances in database");
try {
// use intent
db.declareIntent(new OIntentMassiveInsert());
// Find all the InstanceProcessors to feed them the stored Instances
final List<InstanceProcessor> processors;
if (doProcessing) {
final InstanceProcessingExtension ext = new InstanceProcessingExtension(serviceProvider);
processors = ext.getInstanceProcessors();
} else {
processors = Collections.emptyList();
}
BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE);
final InstanceIndexService indexService;
if (doProcessing) {
indexService = serviceProvider.getService(InstanceIndexService.class);
} else {
indexService = null;
}
// TODO decouple next() and save()?
SimpleLogContext.withLog(report, () -> {
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
ResourceIterator<Instance> it = instances.iterator();
int size = instances.size();
try {
while (it.hasNext() && !monitor.isCanceled()) {
// last count update
long lastUpdate = 0;
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
Instance instance = it.next();
// further processing before storing
processInstance(instance);
// get/create OInstance
OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance)));
conv.setInserted(true);
// update the instance to store, e.g. generating
// metadata
updateInstance(conv);
ODatabaseRecordThreadLocal.INSTANCE.set(db);
// configure the document
ODocument doc = conv.configureDocument(db);
// and save it
doc.save();
// Create an InstanceReference for the saved instance
// and
// feed it to all known InstanceProcessors. The
// decoration
// with ResolvableInstanceReference allows the
// InstanceProcessors to resolve the instances if
// required.
OrientInstanceReference oRef = new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition());
IdentifiableInstanceReference idRef = new IdentifiableInstanceReference(oRef, doc.getIdentity());
ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(idRef, browser);
processors.forEach(p -> p.process(instance, resolvableRef));
if (indexService != null) {
indexService.add(instance, resolvableRef);
}
count.incrementAndGet();
TypeDefinition type = instance.getDefinition();
if (type != null) {
typeCount.adjustOrPutValue(type.getName(), 1, 1);
}
if (exactProgress) {
monitor.worked(1);
}
long now = System.currentTimeMillis();
if (now - lastUpdate > 100) {
// only update every 100
// milliseconds
monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count.get()), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : ""));
lastUpdate = now;
}
}
} finally {
it.close();
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(null);
}
}
});
db.declareIntent(null);
} catch (RuntimeException e) {
if (report != null) {
reportTypeCount(report, typeCount);
report.error(new MessageImpl("Error storing instances in database", e));
report.setSuccess(false);
reportHandler.publishReport(report);
}
throw e;
} finally {
ref.dispose();
trans.end();
/*
* Reset instances to prevent memory leak. It seems Eclipse
* internally holds a reference to the job (in JobInfo and/or
* ProgressMonitorFocusJobDialog) and this results in the instance
* collection not being garbage collected. This is especially bad,
* if an in-memory instance collection is used, e.g. a
* DefaultInstanceCollection that is used when loading a Shapefile.
*/
instances = null;
}
try {
onComplete();
} catch (RuntimeException e) {
String message = "Error while post processing stored instances";
if (report != null) {
report.error(new MessageImpl(message, e));
} else {
log.error(message, e);
}
}
String message = MessageFormat.format("Stored {0} instances in the database.", count);
if (monitor.isCanceled()) {
String warn = "Loading instances was canceled, incomplete data set in the database.";
if (report != null) {
report.warn(new MessageImpl(warn, null));
} else {
log.warn(warn);
}
}
if (report != null) {
reportTypeCount(report, typeCount);
report.setSuccess(true);
report.setSummary(message);
reportHandler.publishReport(report);
} else {
log.info(message);
}
monitor.done();
return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message);
}
use of eu.esdihumboldt.hale.common.instance.index.InstanceIndexService in project hale by halestudio.
the class IndexJoinHandler method partitionInstances.
/**
* @see eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) || transformationParameters.get(PARAMETER_JOIN).isEmpty()) {
throw new TransformationException("No join parameter defined");
}
JoinHandler fallbackHandler = new JoinHandler();
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
if (indexService == null) {
log.warn(MessageFormat.format("Index service not available, falling back to join handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
JoinParameter joinParameter = transformationParameters.get(PARAMETER_JOIN).get(0).as(JoinParameter.class);
String validation = joinParameter.validate();
if (validation != null) {
throw new TransformationException("Join parameter invalid: " + validation);
}
List<TypeEntityDefinition> types = joinParameter.getTypes();
JoinDefinition joinDefinition = JoinUtil.getJoinDefinition(joinParameter);
// remember instances of first type to start join afterwards
Collection<ResolvableInstanceReference> startInstances = new LinkedList<ResolvableInstanceReference>();
List<Object> inputInstanceIds = new ArrayList<>();
try (ResourceIterator<Instance> it = instances.iterator()) {
while (it.hasNext()) {
Instance i = InstanceDecorator.getRoot(it.next());
// remember instances of first type
if (i.getDefinition().equals(types.get(0).getDefinition())) {
startInstances.add(new ResolvableInstanceReference(instances.getReference(i), instances));
}
if (!Identifiable.is(i)) {
log.warn(MessageFormat.format("At least one instance does not have an ID, falling back to join handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
inputInstanceIds.add(Identifiable.getId(i));
}
}
return new IndexJoinIterator(startInstances, joinDefinition, indexService);
}
use of eu.esdihumboldt.hale.common.instance.index.InstanceIndexService in project hale by halestudio.
the class IndexMergeHandler method partitionInstances.
/**
* @see eu.esdihumboldt.cst.functions.core.merge.AbstractMergeHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
PropertiesMergeHandler fallbackHandler = new PropertiesMergeHandler();
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
if (indexService == null) {
log.warn(MessageFormat.format("Index service not available, falling back to merge handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
final IndexMergeConfig mergeConfig = createMergeConfiguration(transformationParameters);
QName typeName;
try (ResourceIterator<Instance> it = instances.iterator()) {
if (it.hasNext()) {
typeName = it.next().getDefinition().getName();
} else {
// Nothing to partition
return new ResourceIterator<FamilyInstance>() {
@Override
public boolean hasNext() {
return false;
}
@Override
public FamilyInstance next() {
return null;
}
@Override
public void close() {
// Do nothing
}
};
}
}
// Querying the index will yield a result over all instances. We must,
// however, be able to operate only on the given input instances instead
// of all instances.
// We must, therefore, be able to uniquely identify every instance in
// the index, so that we can retain from the index query only the
// relevant instances.
List<Object> inputInstanceIds = new ArrayList<>();
try (ResourceIterator<Instance> it = instances.iterator()) {
while (it.hasNext()) {
Instance i = InstanceDecorator.getRoot(it.next());
if (!Identifiable.is(i)) {
log.warn(MessageFormat.format("At least one instance does not have an ID, falling back to merge handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
inputInstanceIds.add(Identifiable.getId(i));
}
}
Collection<Collection<ResolvableInstanceReference>> partitionedIndex = indexService.groupBy(typeName, mergeConfig.keyProperties);
// Remove instance groups from the partitioned index where none of the
// instances in the group are in the processed instances.
partitionedIndex.removeIf(part -> !part.stream().map(ref -> ref.getId()).anyMatch(id -> inputInstanceIds.contains(id)));
Iterator<Collection<ResolvableInstanceReference>> it = partitionedIndex.iterator();
return new ResourceIterator<FamilyInstance>() {
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public FamilyInstance next() {
Collection<ResolvableInstanceReference> instanceRefs = it.next();
InstanceCollection instancesToBeMerged = new DefaultInstanceCollection(instanceRefs.stream().map(ref -> ref.resolve()).collect(Collectors.toList()));
return new FamilyInstanceImpl(merge(instancesToBeMerged, mergeConfig));
}
@Override
public void close() {
// TODO Auto-generated method stub
}
};
}
Aggregations