Search in sources :

Example 91 with TypeDefinition

use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.

the class StoreInstancesJob method run.

/**
 * @see Job#run(IProgressMonitor)
 */
@Override
public IStatus run(IProgressMonitor monitor) {
    boolean exactProgress = instances.hasSize();
    monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN));
    AtomicInteger count = new AtomicInteger(0);
    TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>();
    if (report != null) {
        // set the correct start time
        report.setStartTime(new Date());
    }
    // get database connection
    DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite();
    ODatabaseDocumentTx db = ref.getDatabase();
    ATransaction trans = log.begin("Store instances in database");
    try {
        // use intent
        db.declareIntent(new OIntentMassiveInsert());
        // Find all the InstanceProcessors to feed them the stored Instances
        final List<InstanceProcessor> processors;
        if (doProcessing) {
            final InstanceProcessingExtension ext = new InstanceProcessingExtension(serviceProvider);
            processors = ext.getInstanceProcessors();
        } else {
            processors = Collections.emptyList();
        }
        BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE);
        final InstanceIndexService indexService;
        if (doProcessing) {
            indexService = serviceProvider.getService(InstanceIndexService.class);
        } else {
            indexService = null;
        }
        // TODO decouple next() and save()?
        SimpleLogContext.withLog(report, () -> {
            if (report != null && instances instanceof LogAware) {
                ((LogAware) instances).setLog(report);
            }
            ResourceIterator<Instance> it = instances.iterator();
            int size = instances.size();
            try {
                while (it.hasNext() && !monitor.isCanceled()) {
                    // last count update
                    long lastUpdate = 0;
                    if (report != null && instances instanceof LogAware) {
                        ((LogAware) instances).setLog(report);
                    }
                    Instance instance = it.next();
                    // further processing before storing
                    processInstance(instance);
                    // get/create OInstance
                    OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance)));
                    conv.setInserted(true);
                    // update the instance to store, e.g. generating
                    // metadata
                    updateInstance(conv);
                    ODatabaseRecordThreadLocal.INSTANCE.set(db);
                    // configure the document
                    ODocument doc = conv.configureDocument(db);
                    // and save it
                    doc.save();
                    // Create an InstanceReference for the saved instance
                    // and
                    // feed it to all known InstanceProcessors. The
                    // decoration
                    // with ResolvableInstanceReference allows the
                    // InstanceProcessors to resolve the instances if
                    // required.
                    OrientInstanceReference oRef = new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition());
                    IdentifiableInstanceReference idRef = new IdentifiableInstanceReference(oRef, doc.getIdentity());
                    ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(idRef, browser);
                    processors.forEach(p -> p.process(instance, resolvableRef));
                    if (indexService != null) {
                        indexService.add(instance, resolvableRef);
                    }
                    count.incrementAndGet();
                    TypeDefinition type = instance.getDefinition();
                    if (type != null) {
                        typeCount.adjustOrPutValue(type.getName(), 1, 1);
                    }
                    if (exactProgress) {
                        monitor.worked(1);
                    }
                    long now = System.currentTimeMillis();
                    if (now - lastUpdate > 100) {
                        // only update every 100
                        // milliseconds
                        monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count.get()), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : ""));
                        lastUpdate = now;
                    }
                }
            } finally {
                it.close();
                if (report != null && instances instanceof LogAware) {
                    ((LogAware) instances).setLog(null);
                }
            }
        });
        db.declareIntent(null);
    } catch (RuntimeException e) {
        if (report != null) {
            reportTypeCount(report, typeCount);
            report.error(new MessageImpl("Error storing instances in database", e));
            report.setSuccess(false);
            reportHandler.publishReport(report);
        }
        throw e;
    } finally {
        ref.dispose();
        trans.end();
        /*
			 * Reset instances to prevent memory leak. It seems Eclipse
			 * internally holds a reference to the job (in JobInfo and/or
			 * ProgressMonitorFocusJobDialog) and this results in the instance
			 * collection not being garbage collected. This is especially bad,
			 * if an in-memory instance collection is used, e.g. a
			 * DefaultInstanceCollection that is used when loading a Shapefile.
			 */
        instances = null;
    }
    try {
        onComplete();
    } catch (RuntimeException e) {
        String message = "Error while post processing stored instances";
        if (report != null) {
            report.error(new MessageImpl(message, e));
        } else {
            log.error(message, e);
        }
    }
    String message = MessageFormat.format("Stored {0} instances in the database.", count);
    if (monitor.isCanceled()) {
        String warn = "Loading instances was canceled, incomplete data set in the database.";
        if (report != null) {
            report.warn(new MessageImpl(warn, null));
        } else {
            log.warn(warn);
        }
    }
    if (report != null) {
        reportTypeCount(report, typeCount);
        report.setSuccess(true);
        report.setSummary(message);
        reportHandler.publishReport(report);
    } else {
        log.info(message);
    }
    monitor.done();
    return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message);
}
Also used : MutableInstance(eu.esdihumboldt.hale.common.instance.model.MutableInstance) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) OInstance(eu.esdihumboldt.hale.common.instance.orient.OInstance) IdentifiableInstanceReference(eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference) ODatabaseDocumentTx(com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx) OInstance(eu.esdihumboldt.hale.common.instance.orient.OInstance) OIntentMassiveInsert(com.orientechnologies.orient.core.intent.OIntentMassiveInsert) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) TObjectIntHashMap(gnu.trove.TObjectIntHashMap) InstanceProcessor(eu.esdihumboldt.hale.common.instance.processing.InstanceProcessor) InstanceIndexService(eu.esdihumboldt.hale.common.instance.index.InstanceIndexService) ODocument(com.orientechnologies.orient.core.record.impl.ODocument) IStatus(org.eclipse.core.runtime.IStatus) Status(org.eclipse.core.runtime.Status) LogAware(eu.esdihumboldt.hale.common.core.report.LogAware) QName(javax.xml.namespace.QName) Date(java.util.Date) InstanceProcessingExtension(eu.esdihumboldt.hale.common.instance.processing.InstanceProcessingExtension) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ATransaction(de.fhg.igd.slf4jplus.ATransaction) ResolvableInstanceReference(eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference) MessageImpl(eu.esdihumboldt.hale.common.core.report.impl.MessageImpl)

Example 92 with TypeDefinition

use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.

the class PropertyResolverTest method testLoadShiporder.

/**
 * Test loading a simple XML file with one instance
 *
 * @throws Exception if an error occurs
 */
@Test
public void testLoadShiporder() throws Exception {
    InstanceCollection instances = loadXMLInstances(getClass().getResource("/data/shiporder/shiporder.xsd").toURI(), getClass().getResource("/data/shiporder/shiporder.xml").toURI());
    ResourceIterator<Instance> it = instances.iterator();
    try {
        assertTrue(it.hasNext());
        Instance instance = it.next();
        assertNotNull(instance);
        @SuppressWarnings("unused") TypeDefinition test = instance.getDefinition().getChildren().iterator().next().asProperty().getParentType();
        assertTrue(PropertyResolver.hasProperty(instance, "{http://www.example.com}orderperson"));
        assertTrue(PropertyResolver.hasProperty(instance, "{http://www.example.com}shipto.{http://www.example.com}city"));
        assertTrue(PropertyResolver.getQueryPath(instance, "{http://www.example.com}shipto.{http://www.example.com}city").contains("{http://www.example.com}shipto.{http://www.example.com}city"));
        assertTrue(PropertyResolver.hasProperty(instance, "orderperson"));
        assertTrue(PropertyResolver.hasProperty(instance, "shipto.city"));
        assertTrue(PropertyResolver.hasProperty(instance, "shipto.{http://www.example.com}city"));
        assertEquals(PropertyResolver.getValues(instance, "shipto.city").iterator().next(), "4000 Stavanger");
    } finally {
        it.close();
    }
}
Also used : MutableInstance(eu.esdihumboldt.hale.common.instance.model.MutableInstance) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) DefaultInstance(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstance) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) Test(org.junit.Test)

Example 93 with TypeDefinition

use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.

the class CSVInstanceReaderTest method testReadSimple.

/**
 * Test - read a sample csv schema and data.
 *
 * @throws Exception , if an error occurs
 */
@Test
public void testReadSimple() throws Exception {
    String typeName = "location";
    String[] properties = { "Name", "Xcoord", "Ycoord", "id" };
    String[] dataFirstColumn = { "test", "12", "16", "1" };
    int numberOfInstances = 2;
    // read Schema ###
    Schema schema = readCSVSchema("/data/test1.csv", typeName, "java.lang.String,java.lang.String,java.lang.String,java.lang.String", "Name,Xcoord,Ycoord,id", null, null, null);
    // Test properties
    TypeDefinition schemaType = schema.getType(QName.valueOf(typeName));
    // Check every property for their existence
    for (String propertyName : properties) {
        assertEquals(propertyName, schemaType.getChild(QName.valueOf(propertyName)).getDisplayName());
    }
    // read Instances ###
    InstanceCollection instances = readCSVInstances("/data/test1.csv", typeName, true, schema, null, null, null);
    assertEquals(numberOfInstances, collectionSize(instances));
    // get Type to check property definition (schema and instance
    // combination)
    TypeDefinition type = instances.iterator().next().getDefinition();
    ChildDefinition<?> child = null;
    assertEquals(typeName, type.getDisplayName());
    for (int i = 0; i < properties.length; i++) {
        child = type.getChild(QName.valueOf(properties[i]));
        assertEquals(properties[i], child.getDisplayName());
    }
    // Check the values of the first (type) instance
    Instance instance = instances.iterator().next();
    Object[] value;
    for (int i = 0; i < dataFirstColumn.length; i++) {
        value = instance.getProperty(QName.valueOf(properties[i]));
        assertEquals(dataFirstColumn[i], value[0]);
        assertTrue(value[0] instanceof String);
    }
}
Also used : Instance(eu.esdihumboldt.hale.common.instance.model.Instance) Schema(eu.esdihumboldt.hale.common.schema.model.Schema) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) Test(org.junit.Test)

Example 94 with TypeDefinition

use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.

the class CSVInstanceReaderTest method testReadWithPointDecimal.

/**
 * Test - read a sample csv schema and data with point as a decimal divisor
 *
 * @throws Exception , if an error occurs
 */
@Test
public void testReadWithPointDecimal() throws Exception {
    String typeName = "Random";
    String[] properties = { "A", "B", "C", "D", "E" };
    Object[] dataFirstColumn = { new Integer(1), "A", new Float(32647968.61), new Float(5649088.376), "Linderbacher Straße" };
    int numberOfInstances = 5;
    // read Schema ###
    Schema schema = readCSVSchema("/data/test3-pointdecimal.csv", typeName, "java.lang.Integer,java.lang.String,java.lang.Float,java.lang.Float,java.lang.String", "A,B,C,D,E", ";", null, null, ".");
    // Test properties
    TypeDefinition schemaType = schema.getType(QName.valueOf(typeName));
    // Check every property for their existence
    for (String propertyName : properties) {
        assertEquals(propertyName, schemaType.getChild(QName.valueOf(propertyName)).getDisplayName());
    }
    // read Instances ###
    InstanceCollection instances = readCSVInstances("/data/test3-pointdecimal.csv", typeName, true, schema, ";", null, null, ".");
    assertEquals(numberOfInstances, collectionSize(instances));
    // get Type to check property definition (schema and instance
    // combination)
    TypeDefinition type = instances.iterator().next().getDefinition();
    ChildDefinition<?> child = null;
    assertEquals(typeName, type.getDisplayName());
    for (int i = 0; i < properties.length; i++) {
        child = type.getChild(QName.valueOf(properties[i]));
        assertEquals(properties[i], child.getDisplayName());
    }
    // Check the values of the first (type) instance
    Instance instance = instances.iterator().next();
    Object[] value;
    for (int i = 0; i < dataFirstColumn.length; i++) {
        value = instance.getProperty(QName.valueOf(properties[i]));
        assertEquals(dataFirstColumn[i], value[0]);
    }
}
Also used : Instance(eu.esdihumboldt.hale.common.instance.model.Instance) Schema(eu.esdihumboldt.hale.common.schema.model.Schema) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) Test(org.junit.Test)

Example 95 with TypeDefinition

use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.

the class CSVSchemaReaderTest method testRead2.

/**
 * Test for no given property names and property types (using default
 * settings)
 *
 * @throws Exception the Exception thrown if the test fails
 */
@Test
public void testRead2() throws Exception {
    String prop = "Name,Xcoord,Ycoord,id";
    CSVSchemaReader schemaReader = new CSVSchemaReader();
    schemaReader.setSource(new DefaultInputSupplier(getClass().getResource("/data/test1.csv").toURI()));
    schemaReader.setParameter(CommonSchemaConstants.PARAM_TYPENAME, Value.of("TestTyp"));
    schemaReader.setParameter(CSVSchemaReader.PARAM_PROPERTY, null);
    schemaReader.setParameter(CSVSchemaReader.PARAM_PROPERTYTYPE, null);
    schemaReader.setParameter(CSVSchemaReader.PARAM_SEPARATOR, null);
    schemaReader.setParameter(CSVSchemaReader.PARAM_QUOTE, null);
    schemaReader.setParameter(CSVSchemaReader.PARAM_ESCAPE, null);
    IOReport report = schemaReader.execute(new LogProgressIndicator());
    assertTrue(report.isSuccess());
    Schema schema = schemaReader.getSchema();
    assertEquals(1, schema.getMappingRelevantTypes().size());
    TypeDefinition type = schema.getMappingRelevantTypes().iterator().next();
    assertTrue(type.getName().getLocalPart().equals("TestTyp"));
    Iterator<? extends ChildDefinition<?>> it = type.getChildren().iterator();
    while (it.hasNext()) {
        assertTrue(prop.contains(it.next().getName().getLocalPart()));
    }
}
Also used : DefaultInputSupplier(eu.esdihumboldt.hale.common.core.io.supplier.DefaultInputSupplier) CSVSchemaReader(eu.esdihumboldt.hale.io.csv.reader.internal.CSVSchemaReader) Schema(eu.esdihumboldt.hale.common.schema.model.Schema) IOReport(eu.esdihumboldt.hale.common.core.io.report.IOReport) LogProgressIndicator(eu.esdihumboldt.hale.common.core.io.impl.LogProgressIndicator) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) Test(org.junit.Test)

Aggregations

TypeDefinition (eu.esdihumboldt.hale.common.schema.model.TypeDefinition)192 QName (javax.xml.namespace.QName)58 PropertyDefinition (eu.esdihumboldt.hale.common.schema.model.PropertyDefinition)38 ArrayList (java.util.ArrayList)32 TypeEntityDefinition (eu.esdihumboldt.hale.common.align.model.impl.TypeEntityDefinition)26 DefaultTypeDefinition (eu.esdihumboldt.hale.common.schema.model.impl.DefaultTypeDefinition)26 Test (org.junit.Test)24 Instance (eu.esdihumboldt.hale.common.instance.model.Instance)22 PropertyEntityDefinition (eu.esdihumboldt.hale.common.align.model.impl.PropertyEntityDefinition)21 HashSet (java.util.HashSet)21 EntityDefinition (eu.esdihumboldt.hale.common.align.model.EntityDefinition)20 Schema (eu.esdihumboldt.hale.common.schema.model.Schema)20 InstanceCollection (eu.esdihumboldt.hale.common.instance.model.InstanceCollection)16 ChildContext (eu.esdihumboldt.hale.common.align.model.ChildContext)15 XmlElement (eu.esdihumboldt.hale.io.xsd.model.XmlElement)15 Cell (eu.esdihumboldt.hale.common.align.model.Cell)14 IOMessageImpl (eu.esdihumboldt.hale.common.core.io.report.impl.IOMessageImpl)14 MutableCell (eu.esdihumboldt.hale.common.align.model.MutableCell)12 Binding (eu.esdihumboldt.hale.common.schema.model.constraint.type.Binding)12 DefaultPropertyDefinition (eu.esdihumboldt.hale.common.schema.model.impl.DefaultPropertyDefinition)12