Search in sources :

Example 1 with ResolvableInstanceReference

use of eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference in project hale by halestudio.

the class StoreInstancesJob method run.

/**
 * @see Job#run(IProgressMonitor)
 */
@Override
public IStatus run(IProgressMonitor monitor) {
    boolean exactProgress = instances.hasSize();
    monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN));
    AtomicInteger count = new AtomicInteger(0);
    TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>();
    if (report != null) {
        // set the correct start time
        report.setStartTime(new Date());
    }
    // get database connection
    DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite();
    ODatabaseDocumentTx db = ref.getDatabase();
    ATransaction trans = log.begin("Store instances in database");
    try {
        // use intent
        db.declareIntent(new OIntentMassiveInsert());
        // Find all the InstanceProcessors to feed them the stored Instances
        final List<InstanceProcessor> processors;
        if (doProcessing) {
            final InstanceProcessingExtension ext = new InstanceProcessingExtension(serviceProvider);
            processors = ext.getInstanceProcessors();
        } else {
            processors = Collections.emptyList();
        }
        BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE);
        final InstanceIndexService indexService;
        if (doProcessing) {
            indexService = serviceProvider.getService(InstanceIndexService.class);
        } else {
            indexService = null;
        }
        // TODO decouple next() and save()?
        SimpleLogContext.withLog(report, () -> {
            if (report != null && instances instanceof LogAware) {
                ((LogAware) instances).setLog(report);
            }
            ResourceIterator<Instance> it = instances.iterator();
            int size = instances.size();
            try {
                while (it.hasNext() && !monitor.isCanceled()) {
                    // last count update
                    long lastUpdate = 0;
                    if (report != null && instances instanceof LogAware) {
                        ((LogAware) instances).setLog(report);
                    }
                    Instance instance = it.next();
                    // further processing before storing
                    processInstance(instance);
                    // get/create OInstance
                    OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance)));
                    conv.setInserted(true);
                    // update the instance to store, e.g. generating
                    // metadata
                    updateInstance(conv);
                    ODatabaseRecordThreadLocal.INSTANCE.set(db);
                    // configure the document
                    ODocument doc = conv.configureDocument(db);
                    // and save it
                    doc.save();
                    // Create an InstanceReference for the saved instance
                    // and
                    // feed it to all known InstanceProcessors. The
                    // decoration
                    // with ResolvableInstanceReference allows the
                    // InstanceProcessors to resolve the instances if
                    // required.
                    OrientInstanceReference oRef = new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition());
                    IdentifiableInstanceReference idRef = new IdentifiableInstanceReference(oRef, doc.getIdentity());
                    ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(idRef, browser);
                    processors.forEach(p -> p.process(instance, resolvableRef));
                    if (indexService != null) {
                        indexService.add(instance, resolvableRef);
                    }
                    count.incrementAndGet();
                    TypeDefinition type = instance.getDefinition();
                    if (type != null) {
                        typeCount.adjustOrPutValue(type.getName(), 1, 1);
                    }
                    if (exactProgress) {
                        monitor.worked(1);
                    }
                    long now = System.currentTimeMillis();
                    if (now - lastUpdate > 100) {
                        // only update every 100
                        // milliseconds
                        monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count.get()), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : ""));
                        lastUpdate = now;
                    }
                }
            } finally {
                it.close();
                if (report != null && instances instanceof LogAware) {
                    ((LogAware) instances).setLog(null);
                }
            }
        });
        db.declareIntent(null);
    } catch (RuntimeException e) {
        if (report != null) {
            reportTypeCount(report, typeCount);
            report.error(new MessageImpl("Error storing instances in database", e));
            report.setSuccess(false);
            reportHandler.publishReport(report);
        }
        throw e;
    } finally {
        ref.dispose();
        trans.end();
        /*
			 * Reset instances to prevent memory leak. It seems Eclipse
			 * internally holds a reference to the job (in JobInfo and/or
			 * ProgressMonitorFocusJobDialog) and this results in the instance
			 * collection not being garbage collected. This is especially bad,
			 * if an in-memory instance collection is used, e.g. a
			 * DefaultInstanceCollection that is used when loading a Shapefile.
			 */
        instances = null;
    }
    try {
        onComplete();
    } catch (RuntimeException e) {
        String message = "Error while post processing stored instances";
        if (report != null) {
            report.error(new MessageImpl(message, e));
        } else {
            log.error(message, e);
        }
    }
    String message = MessageFormat.format("Stored {0} instances in the database.", count);
    if (monitor.isCanceled()) {
        String warn = "Loading instances was canceled, incomplete data set in the database.";
        if (report != null) {
            report.warn(new MessageImpl(warn, null));
        } else {
            log.warn(warn);
        }
    }
    if (report != null) {
        reportTypeCount(report, typeCount);
        report.setSuccess(true);
        report.setSummary(message);
        reportHandler.publishReport(report);
    } else {
        log.info(message);
    }
    monitor.done();
    return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message);
}
Also used : MutableInstance(eu.esdihumboldt.hale.common.instance.model.MutableInstance) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) OInstance(eu.esdihumboldt.hale.common.instance.orient.OInstance) IdentifiableInstanceReference(eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference) ODatabaseDocumentTx(com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx) OInstance(eu.esdihumboldt.hale.common.instance.orient.OInstance) OIntentMassiveInsert(com.orientechnologies.orient.core.intent.OIntentMassiveInsert) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) TObjectIntHashMap(gnu.trove.TObjectIntHashMap) InstanceProcessor(eu.esdihumboldt.hale.common.instance.processing.InstanceProcessor) InstanceIndexService(eu.esdihumboldt.hale.common.instance.index.InstanceIndexService) ODocument(com.orientechnologies.orient.core.record.impl.ODocument) IStatus(org.eclipse.core.runtime.IStatus) Status(org.eclipse.core.runtime.Status) LogAware(eu.esdihumboldt.hale.common.core.report.LogAware) QName(javax.xml.namespace.QName) Date(java.util.Date) InstanceProcessingExtension(eu.esdihumboldt.hale.common.instance.processing.InstanceProcessingExtension) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ATransaction(de.fhg.igd.slf4jplus.ATransaction) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) MessageImpl(eu.esdihumboldt.hale.common.core.report.impl.MessageImpl)

Example 2 with ResolvableInstanceReference

use of eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference in project hale by halestudio.

the class MultimapInstanceIndex method getInstancesByValue.

@Override
public Collection<ResolvableInstanceReference> getInstancesByValue(List<QName> propertyPath, List<?> values) {
    // Find all IndexedPropertyValues in the key set of valueIndex where any
    // of the indexed value matches any of the provided values in the given
    // property
    List<List<IndexedPropertyValue>> matchingKeys = valueIndex.keySet().stream().filter(ipvs -> ipvs.stream().anyMatch(ipv -> ipv.getValues().stream().anyMatch(v -> propertyPath.equals(ipv.getPropertyPath()) && ipv.getValues().contains(v)))).collect(Collectors.toList());
    Collection<ResolvableInstanceReference> result = new HashSet<>();
    matchingKeys.forEach(k -> result.addAll(valueIndex.get(k)));
    Iterator<ResolvableInstanceReference> it = result.iterator();
    while (it.hasNext()) {
        ResolvableInstanceReference ref = it.next();
        Collection<List<IndexedPropertyValue>> instValue = instanceIndex.get(ref);
        boolean remove = false;
        for (List<IndexedPropertyValue> ipvs : instValue) {
            for (IndexedPropertyValue ipv : ipvs) {
                if (ipv.getPropertyPath().equals(propertyPath)) {
                    // Allow targets with any of the property values.
                    if (!ipv.getValues().stream().anyMatch(v -> values.contains(v))) {
                        remove = true;
                    }
                }
            }
        }
        if (remove) {
            it.remove();
        }
    }
    return result;
}
Also used : Iterator(java.util.Iterator) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) Collection(java.util.Collection) Multimap(com.google.common.collect.Multimap) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) HashMultimap(com.google.common.collect.HashMultimap) Optional(java.util.Optional) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) InstanceIndexUtil.collectionEquals(eu.esdihumboldt.hale.common.instance.index.InstanceIndexUtil.collectionEquals) QName(javax.xml.namespace.QName) Collections(java.util.Collections) ArrayList(java.util.ArrayList) List(java.util.List) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) HashSet(java.util.HashSet)

Example 3 with ResolvableInstanceReference

use of eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference in project hale by halestudio.

the class InstanceIndexServiceImpl method add.

/**
 * @see eu.esdihumboldt.hale.common.instance.index.InstanceIndexService#add(eu.esdihumboldt.hale.common.instance.model.InstanceReference,
 *      eu.esdihumboldt.hale.common.instance.model.InstanceCollection)
 */
@Override
public void add(InstanceReference reference, InstanceCollection instances) {
    Instance instance = instances.getInstance(reference);
    getIndex(instance.getDefinition().getName()).add(new ResolvableInstanceReference(reference, instances), instance);
}
Also used : Instance(eu.esdihumboldt.hale.common.instance.model.Instance) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference)

Example 4 with ResolvableInstanceReference

use of eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference in project hale by halestudio.

the class IndexJoinIterator method join.

/**
 * Joins all direct children of the given type to currentInstances.
 */
@SuppressWarnings("javadoc")
private void join(FamilyInstance[] currentInstances, int currentType) {
    // Join all types that are direct children of the last type.
    for (int i = currentType + 1; i < joinDefinition.directParent.length; i++) {
        if (joinDefinition.directParent[i] == currentType) {
            // Get join condition for the direct child type.
            Multimap<Integer, JoinCondition> joinConditions = joinDefinition.joinTable.get(i);
            // Collect intersection of conditions. null marks beginning
            // in contrast to an empty set.
            Set<ResolvableInstanceReference> possibleInstances = null;
            // ParentType -> JoinConditions
            for (Map.Entry<Integer, JoinCondition> joinCondition : joinConditions.entries()) {
                PropertyEntityDefinition baseProp = joinCondition.getValue().baseProperty;
                QName baseTypeName = baseProp.getType().getName();
                List<QName> basePropertyPath = baseProp.getPropertyPath().stream().map(pp -> pp.getChild().getName()).collect(Collectors.toList());
                PropertyEntityDefinition joinProp = joinCondition.getValue().joinProperty;
                QName joinTypeName = joinProp.getType().getName();
                List<QName> joinPropertyPath = joinProp.getPropertyPath().stream().map(pp -> pp.getChild().getName()).collect(Collectors.toList());
                List<IndexedPropertyValue> currentValues = index.getInstancePropertyValues(baseTypeName, basePropertyPath, currentInstances[joinCondition.getKey()].getId());
                if (currentValues == null || currentValues.isEmpty()) {
                    possibleInstances = Collections.emptySet();
                    break;
                }
                HashSet<ResolvableInstanceReference> matches = new HashSet<ResolvableInstanceReference>();
                for (IndexedPropertyValue currentValue : currentValues) {
                    if (currentValue.getValues() == null || currentValue.getValues().isEmpty()) {
                        continue;
                    }
                    // Find instances that have the current property value
                    Collection<ResolvableInstanceReference> instancesWithValues = index.getInstancesByValue(joinTypeName, joinPropertyPath, currentValue.getValues());
                    matches.addAll(instancesWithValues);
                }
                if (possibleInstances == null) {
                    possibleInstances = matches;
                } else {
                    // Remove candidates that don't have the current
                    // property value
                    Iterator<ResolvableInstanceReference> it = possibleInstances.iterator();
                    while (it.hasNext()) {
                        ResolvableInstanceReference cand = it.next();
                        if (!matches.contains(cand)) {
                            it.remove();
                        }
                    }
                }
                if (possibleInstances.isEmpty()) {
                    break;
                }
            }
            if (possibleInstances != null && !possibleInstances.isEmpty()) {
                FamilyInstance parent = currentInstances[currentType];
                for (ResolvableInstanceReference ref : possibleInstances) {
                    FamilyInstance child;
                    child = new FamilyInstanceImpl(ref.resolve());
                    parent.addChild(child);
                    currentInstances[i] = child;
                    join(currentInstances, i);
                }
                currentInstances[i] = null;
            }
        }
    }
}
Also used : Iterator(java.util.Iterator) PropertyEntityDefinition(eu.esdihumboldt.hale.common.align.model.impl.PropertyEntityDefinition) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) Collection(java.util.Collection) JoinDefinition(eu.esdihumboldt.cst.functions.core.join.JoinUtil.JoinDefinition) Set(java.util.Set) InstanceIndexService(eu.esdihumboldt.hale.common.instance.index.InstanceIndexService) Multimap(com.google.common.collect.Multimap) Collectors(java.util.stream.Collectors) FamilyInstance(eu.esdihumboldt.hale.common.instance.model.FamilyInstance) HashSet(java.util.HashSet) FamilyInstanceImpl(eu.esdihumboldt.hale.common.align.transformation.function.impl.FamilyInstanceImpl) GenericResourceIteratorAdapter(eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter) List(java.util.List) JoinCondition(eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter.JoinCondition) Map(java.util.Map) QName(javax.xml.namespace.QName) IndexedPropertyValue(eu.esdihumboldt.hale.common.instance.index.IndexedPropertyValue) Collections(java.util.Collections) QName(javax.xml.namespace.QName) IndexedPropertyValue(eu.esdihumboldt.hale.common.instance.index.IndexedPropertyValue) JoinCondition(eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter.JoinCondition) FamilyInstanceImpl(eu.esdihumboldt.hale.common.align.transformation.function.impl.FamilyInstanceImpl) PropertyEntityDefinition(eu.esdihumboldt.hale.common.align.model.impl.PropertyEntityDefinition) FamilyInstance(eu.esdihumboldt.hale.common.instance.model.FamilyInstance) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) Map(java.util.Map) HashSet(java.util.HashSet)

Example 5 with ResolvableInstanceReference

use of eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference in project hale by halestudio.

the class JoinIterator method join.

// Joins all direct children of the given type to currentInstances.
private void join(FamilyInstance[] currentInstances, int currentType) {
    // Join all types that are direct children of the last type.
    for (int i = currentType + 1; i < parent.length; i++) {
        if (parent[i] == currentType) {
            // Get join condition for the direct child type.
            Multimap<Integer, JoinCondition> joinConditions = joinTable.get(i);
            // Collect intersection of conditions. null marks beginning
            // in contrast to an empty set.
            Set<InstanceReference> possibleInstances = null;
            // ParentType -> JoinConditions
            for (Map.Entry<Integer, JoinCondition> joinCondition : joinConditions.entries()) {
                Collection<Object> currentValues = AlignmentUtil.getValues(currentInstances[joinCondition.getKey()], joinCondition.getValue().baseProperty, true);
                if (currentValues == null) {
                    possibleInstances = Collections.emptySet();
                    break;
                }
                // Allow targets with any of the property values.
                HashSet<InstanceReference> matches = new HashSet<InstanceReference>();
                for (Object currentValue : currentValues) {
                    Object keyValue = currentValue;
                    if (valueProcessor != null) {
                        keyValue = valueProcessor.processValue(currentValue, joinCondition.getValue().baseProperty);
                    }
                    matches.addAll(index.get(joinCondition.getValue().joinProperty).get(keyValue));
                }
                if (possibleInstances == null)
                    possibleInstances = matches;
                else {
                    // Intersect!
                    Iterator<InstanceReference> iter = possibleInstances.iterator();
                    while (iter.hasNext()) {
                        InstanceReference ref = iter.next();
                        if (!matches.contains(ref))
                            iter.remove();
                    }
                }
                // Break if set is empty.
                if (possibleInstances.isEmpty())
                    break;
            }
            if (possibleInstances != null && !possibleInstances.isEmpty()) {
                FamilyInstance parent = currentInstances[currentType];
                for (InstanceReference ref : possibleInstances) {
                    FamilyInstance child;
                    if (ref instanceof ResolvableInstanceReference) {
                        child = new FamilyInstanceImpl(((ResolvableInstanceReference) ref).resolve());
                    } else {
                        child = new FamilyInstanceImpl(instances.getInstance(ref));
                    }
                    parent.addChild(child);
                    currentInstances[i] = child;
                    join(currentInstances, i);
                }
                currentInstances[i] = null;
            }
        }
    }
}
Also used : JoinCondition(eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter.JoinCondition) FamilyInstanceImpl(eu.esdihumboldt.hale.common.align.transformation.function.impl.FamilyInstanceImpl) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) InstanceReference(eu.esdihumboldt.hale.common.instance.model.InstanceReference) FamilyInstance(eu.esdihumboldt.hale.common.instance.model.FamilyInstance) Map(java.util.Map) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) HashSet(java.util.HashSet)

Aggregations

ResolvableInstanceReference (eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference)9 Instance (eu.esdihumboldt.hale.common.instance.model.Instance)6 InstanceIndexService (eu.esdihumboldt.hale.common.instance.index.InstanceIndexService)4 FamilyInstance (eu.esdihumboldt.hale.common.instance.model.FamilyInstance)4 HashSet (java.util.HashSet)4 FamilyInstanceImpl (eu.esdihumboldt.hale.common.align.transformation.function.impl.FamilyInstanceImpl)3 IdentifiableInstanceReference (eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference)3 InstanceReference (eu.esdihumboldt.hale.common.instance.model.InstanceReference)3 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 Iterator (java.util.Iterator)3 List (java.util.List)3 Map (java.util.Map)3 QName (javax.xml.namespace.QName)3 Multimap (com.google.common.collect.Multimap)2 JoinDefinition (eu.esdihumboldt.cst.functions.core.join.JoinUtil.JoinDefinition)2 JoinCondition (eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter.JoinCondition)2 TransformationException (eu.esdihumboldt.hale.common.align.transformation.function.TransformationException)2 InstanceCollection (eu.esdihumboldt.hale.common.instance.model.InstanceCollection)2 MutableInstance (eu.esdihumboldt.hale.common.instance.model.MutableInstance)2