use of eu.esdihumboldt.cst.functions.core.join.JoinUtil.JoinDefinition in project hale by halestudio.
the class IndexJoinHandler method partitionInstances.
/**
* @see eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) || transformationParameters.get(PARAMETER_JOIN).isEmpty()) {
throw new TransformationException("No join parameter defined");
}
JoinHandler fallbackHandler = new JoinHandler();
InstanceIndexService indexService = serviceProvider.getService(InstanceIndexService.class);
if (indexService == null) {
log.warn(MessageFormat.format("Index service not available, falling back to join handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
JoinParameter joinParameter = transformationParameters.get(PARAMETER_JOIN).get(0).as(JoinParameter.class);
String validation = joinParameter.validate();
if (validation != null) {
throw new TransformationException("Join parameter invalid: " + validation);
}
List<TypeEntityDefinition> types = joinParameter.getTypes();
JoinDefinition joinDefinition = JoinUtil.getJoinDefinition(joinParameter);
// remember instances of first type to start join afterwards
Collection<ResolvableInstanceReference> startInstances = new LinkedList<ResolvableInstanceReference>();
List<Object> inputInstanceIds = new ArrayList<>();
try (ResourceIterator<Instance> it = instances.iterator()) {
while (it.hasNext()) {
Instance i = InstanceDecorator.getRoot(it.next());
// remember instances of first type
if (i.getDefinition().equals(types.get(0).getDefinition())) {
startInstances.add(new ResolvableInstanceReference(instances.getReference(i), instances));
}
if (!Identifiable.is(i)) {
log.warn(MessageFormat.format("At least one instance does not have an ID, falling back to join handler {0}", fallbackHandler.getClass().getCanonicalName()));
return fallbackHandler.partitionInstances(instances, transformationIdentifier, engine, transformationParameters, executionParameters, log);
}
inputInstanceIds.add(Identifiable.getId(i));
}
}
return new IndexJoinIterator(startInstances, joinDefinition, indexService);
}
use of eu.esdihumboldt.cst.functions.core.join.JoinUtil.JoinDefinition in project hale by halestudio.
the class JoinHandler method partitionInstances.
// For now no support for using the same type more than once in a join.
/**
* @see eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances, String transformationIdentifier, TransformationEngine engine, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException {
if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) || transformationParameters.get(PARAMETER_JOIN).isEmpty()) {
throw new TransformationException("No join parameter defined");
}
JoinParameter joinParameter = transformationParameters.get(PARAMETER_JOIN).get(0).as(JoinParameter.class);
String validation = joinParameter.validate();
if (validation != null)
throw new TransformationException("Join parameter invalid: " + validation);
List<TypeEntityDefinition> types = joinParameter.getTypes();
JoinDefinition joinDefinition = JoinUtil.getJoinDefinition(joinParameter);
// JoinProperty -> (Value -> Collection<Reference>)
Map<PropertyEntityDefinition, Multimap<Object, InstanceReference>> index = new HashMap<>();
for (PropertyEntityDefinition property : joinDefinition.properties.values()) index.put(property, ArrayListMultimap.<Object, InstanceReference>create());
// remember instances of first type to start join afterwards
Collection<InstanceReference> startInstances = new LinkedList<InstanceReference>();
// iterate once over all instances
ResourceIterator<Instance> iterator = instances.iterator();
try {
while (iterator.hasNext()) {
Instance next = iterator.next();
// remember instances of first type
if (next.getDefinition().equals(types.get(0).getDefinition())) {
startInstances.add(instances.getReference(next));
}
// fill index over needed properties
for (PropertyEntityDefinition property : joinDefinition.properties.get(next.getDefinition())) {
// XXX what about null? for now ignore null values
// XXX how to treat multiple values? must all be equal (in
// order?) or only one?
Collection<Object> values = AlignmentUtil.getValues(next, property, true);
if (values != null && !values.isEmpty()) {
// XXX take only first value for now
index.get(property).put(valueProcessor.processValue(values.iterator().next(), property), instances.getReference(next));
}
}
}
} finally {
iterator.close();
}
return new JoinIterator(instances, startInstances, joinDefinition.directParent, index, joinDefinition.joinTable, valueProcessor);
}
Aggregations