use of eu.esdihumboldt.hale.common.instance.model.FamilyInstance in project hale by halestudio.
the class JoinHandler method partitionInstances.
// For now no support for using the same type more than once in a join.
/**
* @see eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) || transformationParameters.get(PARAMETER_JOIN).isEmpty()) {
throw new TransformationException("No join parameter defined");
}
JoinParameter joinParameter = transformationParameters.get(PARAMETER_JOIN).get(0).as(JoinParameter.class);
String validation = joinParameter.validate();
if (validation != null)
throw new TransformationException("Join parameter invalid: " + validation);
List<TypeEntityDefinition> types = joinParameter.getTypes();
JoinDefinition joinDefinition = JoinUtil.getJoinDefinition(joinParameter);
// JoinProperty -> (Value -> Collection<Reference>)
Map<PropertyEntityDefinition, Multimap<Object, InstanceReference>> index = new HashMap<>();
for (PropertyEntityDefinition property : joinDefinition.properties.values()) index.put(property, ArrayListMultimap.<Object, InstanceReference>create());
// remember instances of first type to start join afterwards
Collection<InstanceReference> startInstances = new LinkedList<InstanceReference>();
// iterate once over all instances
ResourceIterator<Instance> iterator = instances.iterator();
try {
while (iterator.hasNext()) {
Instance next = iterator.next();
// remember instances of first type
if (next.getDefinition().equals(types.get(0).getDefinition())) {
startInstances.add(instances.getReference(next));
}
// fill index over needed properties
for (PropertyEntityDefinition property : joinDefinition.properties.get(next.getDefinition())) {
// XXX what about null? for now ignore null values
// XXX how to treat multiple values? must all be equal (in
// order?) or only one?
Collection<Object> values = AlignmentUtil.getValues(next, property, true);
if (values != null && !values.isEmpty()) {
// XXX take only first value for now
index.get(property).put(valueProcessor.processValue(values.iterator().next(), property), instances.getReference(next));
}
}
}
} finally {
iterator.close();
}
return new JoinIterator(instances, startInstances, joinDefinition.directParent, index, joinDefinition.joinTable, valueProcessor);
}
use of eu.esdihumboldt.hale.common.instance.model.FamilyInstance in project hale by halestudio.
the class AbstractMergeHandler method partitionInstances.
/**
* @see InstanceHandler#partitionInstances(InstanceCollection, String,
* TransformationEngine, ListMultimap, Map, TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
T mergeConfig = createMergeConfiguration(transformationIdentifier, transformationParameters, executionParameters, log);
// create merge index over all instances (references)
Multimap<K, InstanceReference> index = HashMultimap.create();
ResourceIterator<Instance> it = instances.iterator();
try {
while (it.hasNext()) {
Instance instance = it.next();
K key = getMergeKey(instance, mergeConfig);
index.put(key, instances.getReference(instance));
}
} finally {
it.close();
}
return new MergedIterator(index, instances, mergeConfig);
}
use of eu.esdihumboldt.hale.common.instance.model.FamilyInstance in project hale by halestudio.
the class IndexMergeHandler method partitionInstances.
/**
* @see eu.esdihumboldt.cst.functions.core.merge.AbstractMergeHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
PropertiesMergeHandler fallbackHandler = new PropertiesMergeHandler();
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
if (indexService == null) {
log.warn(MessageFormat.format("Index service not available, falling back to merge handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
final IndexMergeConfig mergeConfig = createMergeConfiguration(transformationParameters);
QName typeName;
try (ResourceIterator<Instance> it = instances.iterator()) {
if (it.hasNext()) {
typeName = it.next().getDefinition().getName();
} else {
// Nothing to partition
return new ResourceIterator<FamilyInstance>() {
@Override
public boolean hasNext() {
return false;
}
@Override
public FamilyInstance next() {
return null;
}
@Override
public void close() {
// Do nothing
}
};
}
}
// Querying the index will yield a result over all instances. We must,
// however, be able to operate only on the given input instances instead
// of all instances.
// We must, therefore, be able to uniquely identify every instance in
// the index, so that we can retain from the index query only the
// relevant instances.
List<Object> inputInstanceIds = new ArrayList<>();
try (ResourceIterator<Instance> it = instances.iterator()) {
while (it.hasNext()) {
Instance i = InstanceDecorator.getRoot(it.next());
if (!Identifiable.is(i)) {
log.warn(MessageFormat.format("At least one instance does not have an ID, falling back to merge handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
inputInstanceIds.add(Identifiable.getId(i));
}
}
Collection<Collection<ResolvableInstanceReference>> partitionedIndex = indexService.groupBy(typeName, mergeConfig.keyProperties);
// Remove instance groups from the partitioned index where none of the
// instances in the group are in the processed instances.
partitionedIndex.removeIf(part -> !part.stream().map(ref -> ref.getId()).anyMatch(id -> inputInstanceIds.contains(id)));
Iterator<Collection<ResolvableInstanceReference>> it = partitionedIndex.iterator();
return new ResourceIterator<FamilyInstance>() {
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public FamilyInstance next() {
Collection<ResolvableInstanceReference> instanceRefs = it.next();
InstanceCollection instancesToBeMerged = new DefaultInstanceCollection(instanceRefs.stream().map(ref -> ref.resolve()).collect(Collectors.toList()));
return new FamilyInstanceImpl(merge(instancesToBeMerged, mergeConfig));
}
@Override
public void close() {
// TODO Auto-generated method stub
}
};
}
use of eu.esdihumboldt.hale.common.instance.model.FamilyInstance in project hale by halestudio.
the class ConceptualSchemaTransformer method doTypeTransformation.
/**
* Execute a type transformation based on single type cell
*
* @param transformation the transformation to use
* @param typeCell the type cell
* @param target the target instance sink
* @param source the source instances
* @param alignment the alignment
* @param engines the engine manager
* @param transformer the property transformer
* @param context the transformation execution context
* @param reporter the reporter
* @param progressIndicator the progress indicator
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
protected void doTypeTransformation(TypeTransformationFactory transformation, Cell typeCell, InstanceCollection source, InstanceSink target, Alignment alignment, EngineManager engines, PropertyTransformer transformer, TransformationContext context, TransformationReporter reporter, ProgressIndicator progressIndicator) {
TransformationLog cellLog = new CellLog(reporter, typeCell);
TypeTransformation<?> function;
try {
function = transformation.createExtensionObject();
} catch (Exception e) {
reporter.error(new TransformationMessageImpl(typeCell, "Error creating transformation function.", e));
return;
}
TransformationEngine engine = engines.get(transformation.getEngineId(), cellLog);
if (engine == null) {
// TODO instead try another transformation
cellLog.error(cellLog.createMessage("Skipping type transformation: No matching transformation engine found", null));
return;
}
// prepare transformation configuration
ListMultimap<String, Type> targetTypes = ArrayListMultimap.create();
for (Entry<String, ? extends Entity> entry : typeCell.getTarget().entries()) {
targetTypes.put(entry.getKey(), (Type) entry.getValue());
}
ListMultimap<String, ParameterValue> parameters = typeCell.getTransformationParameters();
if (parameters != null) {
parameters = Multimaps.unmodifiableListMultimap(parameters);
}
Map<String, String> executionParameters = transformation.getExecutionParameters();
// break on cancel
if (progressIndicator.isCanceled()) {
return;
}
ResourceIterator<FamilyInstance> iterator;
if (typeCell.getSource() == null || typeCell.getSource().isEmpty()) {
// type cell w/o source
// -> execute exactly once w/ null source
source = null;
iterator = new GenericResourceIteratorAdapter<Object, FamilyInstance>(Collections.singleton(null).iterator()) {
@Override
protected FamilyInstance convert(Object next) {
return null;
}
};
} else {
// Step 1: selection
// Select only instances that are relevant for the transformation.
source = source.select(new TypeCellFilter(typeCell));
// Step 2: partition
// use InstanceHandler if available - for example merge or join
function.setExecutionContext(context.getCellContext(typeCell));
InstanceHandler instanceHandler = function.getInstanceHandler();
if (instanceHandler != null) {
injectTransformationContext(instanceHandler, context);
progressIndicator.setCurrentTask("Perform instance partitioning");
try {
iterator = instanceHandler.partitionInstances(source, transformation.getFunctionId(), engine, parameters, executionParameters, cellLog);
} catch (TransformationException e) {
cellLog.error(cellLog.createMessage("Type transformation: partitioning failed", e));
return;
}
} else {
// else just use every instance as is
iterator = new GenericResourceIteratorAdapter<Instance, FamilyInstance>(source.iterator()) {
@Override
protected FamilyInstance convert(Instance next) {
return new FamilyInstanceImpl(next);
}
};
}
}
progressIndicator.setCurrentTask("Execute type transformations");
try {
while (iterator.hasNext()) {
// break on cancel
if (progressIndicator.isCanceled()) {
return;
}
function.setSource(iterator.next());
function.setPropertyTransformer(transformer);
function.setParameters(parameters);
function.setTarget(targetTypes);
function.setExecutionContext(context.getCellContext(typeCell));
try {
((TypeTransformation) function).execute(transformation.getFunctionId(), engine, executionParameters, cellLog, typeCell);
} catch (TransformationException e) {
cellLog.error(cellLog.createMessage("Type transformation failed, skipping instance.", e));
}
}
} finally {
iterator.close();
}
}
Aggregations