use of eu.esdihumboldt.hale.common.instance.model.InstanceCollection in project hale by halestudio.
the class FilteredInstanceCollection method applyFilter.
/**
* Create an instance collection that applies a filter to the given instance
* collection.
*
* @param instances the instance collection to filter
* @param filter the filter
* @return the filtered instance collection
*/
public static InstanceCollection applyFilter(InstanceCollection instances, Filter filter) {
if (filter instanceof TypeFilter && instances instanceof InstanceCollection2) {
/*
* For type filters check if we can make use of fan-out.
*/
InstanceCollection2 instances2 = (InstanceCollection2) instances;
if (instances2.supportsFanout()) {
TypeDefinition type = ((TypeFilter) filter).getType();
InstanceCollection result = instances2.fanout().get(type);
if (result == null) {
result = EmptyInstanceCollection.INSTANCE;
}
return result;
}
}
// create a filtered collection
return new FilteredInstanceCollection(instances, filter);
}
use of eu.esdihumboldt.hale.common.instance.model.InstanceCollection in project hale by halestudio.
the class Transformation method transform.
/**
* Transform the instances provided through the given instance readers and
* supply the result to the given instance writer.
*
* @param sources the instance readers
* @param target the target instance writer
* @param environment the transformation environment
* @param reportHandler the report handler
* @param processId the identifier for the transformation process, may be
* <code>null</code> if grouping the jobs to a job family is not
* necessary
* @param validators the instance validators, may be <code>null</code> or
* empty
* @param filterDefinition {@link InstanceFilterDefinition} object as a
* filter may be <code>null</code>
* @return the future representing the successful completion of the
* transformation (note that a successful completion doesn't
* necessary mean there weren't any internal transformation errors)
*/
public static ListenableFuture<Boolean> transform(List<InstanceReader> sources, InstanceWriter target, final TransformationEnvironment environment, final ReportHandler reportHandler, Object processId, Collection<InstanceValidator> validators, InstanceFilterDefinition filterDefinition) {
final IOAdvisor<InstanceReader> loadDataAdvisor = new AbstractIOAdvisor<InstanceReader>() {
/**
* @see IOAdvisor#prepareProvider(IOProvider)
*/
@Override
public void prepareProvider(InstanceReader provider) {
super.prepareProvider(provider);
provider.setSourceSchema(environment.getSourceSchema());
}
/**
* @see AbstractIOAdvisor#updateConfiguration(IOProvider)
*/
@Override
public void updateConfiguration(InstanceReader provider) {
super.updateConfiguration(provider);
if (environment instanceof ProjectTransformationEnvironment) {
// set project CRS manager as CRS provider
/*
* Resource based CRS settings will however not work, as the
* resource identifiers will not match
*/
provider.setCRSProvider(new ProjectCRSManager(provider, null, ((ProjectTransformationEnvironment) environment).getProject()));
}
}
};
loadDataAdvisor.setServiceProvider(environment);
loadDataAdvisor.setActionId(InstanceIO.ACTION_LOAD_SOURCE_DATA);
List<InstanceCollection> sourceList = Lists.transform(sources, new Function<InstanceReader, InstanceCollection>() {
@Override
public InstanceCollection apply(@Nullable InstanceReader input) {
try {
HeadlessIO.executeProvider(input, loadDataAdvisor, null, reportHandler);
// XXX progress?!
} catch (IOException e) {
throw new IllegalStateException("Failed to load source data", e);
}
return input.getInstances();
}
});
// Apply Filter
InstanceCollection sourceCollection = applyFilter(sourceList, filterDefinition);
final TransformationSink targetSink;
try {
targetSink = TransformationSinkExtension.getInstance().createSink(!target.isPassthrough());
targetSink.setTypes(environment.getTargetSchema());
// add validation to sink
// XXX for now default validation if env variable is set
String env = System.getenv("HALE_TRANSFORMATION_INTERNAL_VALIDATION");
if (env != null && env.equalsIgnoreCase("true")) {
targetSink.addValidator(new DefaultTransformedInstanceValidator(reportHandler, environment));
}
} catch (Exception e) {
throw new IllegalStateException("Error creating target sink", e);
}
IOAdvisor<InstanceWriter> saveDataAdvisor = new AbstractIOAdvisor<InstanceWriter>() {
/**
* @see IOAdvisor#prepareProvider(IOProvider)
*/
@Override
public void prepareProvider(InstanceWriter provider) {
super.prepareProvider(provider);
// set target schema
provider.setTargetSchema(environment.getTargetSchema());
// set instances to export
provider.setInstances(targetSink.getInstanceCollection());
}
};
saveDataAdvisor.setServiceProvider(environment);
saveDataAdvisor.setActionId(InstanceIO.ACTION_SAVE_TRANSFORMED_DATA);
saveDataAdvisor.prepareProvider(target);
saveDataAdvisor.updateConfiguration(target);
ExportJob exportJob = new ExportJob(targetSink, target, saveDataAdvisor, reportHandler);
// no validation
ValidationJob validationJob = null;
if (validators != null && !validators.isEmpty()) {
validationJob = new ValidationJob(validators, reportHandler, target, environment);
}
return transform(sourceCollection, targetSink, exportJob, validationJob, environment.getAlignment(), environment.getSourceSchema(), reportHandler, environment, processId);
}
use of eu.esdihumboldt.hale.common.instance.model.InstanceCollection in project hale by halestudio.
the class Transformation method transform.
/**
* Transform the given instances, according to the given alignment.
*
* @param sources the collection of source instances
* @param targetSink the target sink
* @param exportJob the export job
* @param validationJob the validation job, may be <code>null</code>
* @param alignment the alignment, may not be changed outside this method
* @param sourceSchema the source schema
* @param reportHandler the report handler
* @param serviceProvider the service provider in the transformation context
* @param processId the identifier for the transformation process, may be
* <code>null</code> if grouping the jobs to a job family is not
* necessary
* @return the future representing the successful completion of the
* transformation (note that a successful completion doesn't
* necessary mean there weren't any internal transformation errors)
*/
public static ListenableFuture<Boolean> transform(InstanceCollection sources, final TransformationSink targetSink, final ExportJob exportJob, final ValidationJob validationJob, final Alignment alignment, SchemaSpace sourceSchema, final ReportHandler reportHandler, final ServiceProvider serviceProvider, final Object processId) {
final SettableFuture<Boolean> result = SettableFuture.create();
final InstanceCollection sourceToUse;
// Check whether to create a temporary database or not.
// Currently do not create a temporary DB is there are Retypes/Creates
// only.
boolean useTempDatabase = false;
final LocalOrientDB db;
for (Cell cell : alignment.getActiveTypeCells()) if (!isStreamingTypeTransformation(cell.getTransformationIdentifier())) {
useTempDatabase = true;
break;
}
// Create temporary database if necessary.
if (useTempDatabase) {
// create db
File tmpDir = Files.createTempDir();
db = new LocalOrientDB(tmpDir);
tmpDir.deleteOnExit();
// get instance collection
// sourceToUse = new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE);
// only yield instances that were actually inserted
// this is also done in OrientInstanceService
// TODO make configurable?
sourceToUse = FilteredInstanceCollection.applyFilter(new BrowseOrientInstanceCollection(db, sourceSchema, DataSet.SOURCE), new Filter() {
@Override
public boolean match(Instance instance) {
if (instance instanceof OInstance) {
return ((OInstance) instance).isInserted();
}
return true;
}
});
} else {
sourceToUse = new StatsCountInstanceCollection(sources, reportHandler);
db = null;
}
// create transformation job
final AbstractTransformationJob transformJob = new AbstractTransformationJob("Transformation") {
/**
* @see org.eclipse.core.runtime.jobs.Job#run(org.eclipse.core.runtime.IProgressMonitor)
*/
@Override
protected IStatus run(IProgressMonitor monitor) {
TransformationService transformationService = HalePlatform.getService(TransformationService.class);
TransformationReport report = transformationService.transform(alignment, sourceToUse, targetSink, serviceProvider, new ProgressMonitorIndicator(monitor));
try {
// publish report
reportHandler.publishReport(report);
if (report.isSuccess()) {
return Status.OK_STATUS;
} else {
return ERROR_STATUS;
}
} finally {
// and may lead to the transformation report being lost
if (monitor.isCanceled()) {
targetSink.done(true);
return Status.CANCEL_STATUS;
} else {
targetSink.done(false);
}
}
}
};
// set process IDs to group jobs in a job family
if (processId != null) {
transformJob.setProcessId(processId);
exportJob.setProcessId(processId);
if (validationJob != null) {
validationJob.setProcessId(processId);
}
}
exportJob.setUser(true);
// the jobs should cancel each other
transformJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// log transformation job error (because it otherwise gets
// lost)
String msg = "Error during transformation";
if (event.getResult().getMessage() != null) {
msg = ": " + event.getResult().getMessage();
}
log.error(msg, event.getResult().getException());
// failing transformation is done by cancelling the export
exportJob.cancel();
}
if (db != null) {
db.delete();
}
}
});
// after export is done, validation should run
exportJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
transformJob.cancel();
// failure
failure(result, event);
} else {
if (validationJob == null) {
// success
result.set(true);
} else {
// schedule the validation job
validationJob.schedule();
}
}
}
});
// validation ends the process
if (validationJob != null) {
validationJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (!event.getResult().isOK()) {
// failure
failure(result, event);
} else {
// success
result.set(true);
}
}
});
}
if (useTempDatabase) {
// Initialize instance index with alignment
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
indexService.addPropertyMappings(alignment.getActiveTypeCells(), serviceProvider);
// run store instance job first...
Job storeJob = new StoreInstancesJob("Load source instances into temporary database", db, sources, serviceProvider, reportHandler, true) {
@Override
protected void onComplete() {
// onComplete is also called if monitor is cancelled...
}
@Override
public boolean belongsTo(Object family) {
if (processId == null) {
return super.belongsTo(family);
}
return AbstractTransformationJob.createFamily(processId).equals(family);
}
};
// and schedule jobs on successful completion
storeJob.addJobChangeListener(new JobChangeAdapter() {
@Override
public void done(IJobChangeEvent event) {
if (event.getResult().isOK()) {
exportJob.schedule();
transformJob.schedule();
} else {
failure(result, event);
}
}
});
storeJob.schedule();
} else {
// otherwise feed InstanceProcessors directly from the
// InstanceCollection...
// TODO Implement differently, not w/ PseudoInstanceReference which
// will cause memory problems
// final InstanceProcessingExtension ext = new InstanceProcessingExtension(
// serviceProvider);
// final List<InstanceProcessor> processors = ext.getInstanceProcessors();
//
// ResourceIterator<Instance> it = sourceToUse.iterator();
// try {
// while (it.hasNext()) {
// Instance instance = it.next();
//
// ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(
// new PseudoInstanceReference(instance), sourceToUse);
// processors.forEach(p -> p.process(instance, resolvableRef));
//
// }
// } finally {
// it.close();
// }
// ...and schedule jobs
exportJob.schedule();
transformJob.schedule();
}
return result;
}
use of eu.esdihumboldt.hale.common.instance.model.InstanceCollection in project hale by halestudio.
the class Transformation method applyFilter.
private static InstanceCollection applyFilter(List<InstanceCollection> sourceData, InstanceFilterDefinition filterDefinition) {
List<InstanceCollection> filteredData = new ArrayList<InstanceCollection>();
for (int i = 0; i < sourceData.size(); i++) {
InstanceCollection collection = sourceData.get(i);
if (filterDefinition.isGlobalContext()) {
// add unfiltered, later apply to whole collection
filteredData.add(collection);
} else {
// filter individually
filteredData.add(collection.select(filterDefinition));
}
}
InstanceCollection filteredCollection = new MultiInstanceCollection(filteredData);
if (filterDefinition.isGlobalContext()) {
// apply filter to combined instance collection
filteredCollection = FilteredInstanceCollection.applyFilter(filteredCollection, filterDefinition);
}
return filteredCollection;
}
use of eu.esdihumboldt.hale.common.instance.model.InstanceCollection in project hale by halestudio.
the class PropertyResolverTest method testLoadShiporderWrapped.
/**
* Test with a wrapper instance that has no definition itself.
*
* @throws Exception if an error occurs
*/
@Test
public void testLoadShiporderWrapped() throws Exception {
InstanceCollection instances = loadXMLInstances(getClass().getResource("/data/shiporder/shiporder.xsd").toURI(), getClass().getResource("/data/shiporder/shiporder.xml").toURI());
ResourceIterator<Instance> it = instances.iterator();
try {
assertTrue(it.hasNext());
Instance instance = it.next();
assertNotNull(instance);
// create dummy instance
MutableInstance wrapperInstance = new DefaultInstance(null, null);
wrapperInstance.addProperty(new QName("value"), instance);
assertEquals(PropertyResolver.getValues(wrapperInstance, "value.shipto.city").iterator().next(), "4000 Stavanger");
} finally {
it.close();
}
}
Aggregations