use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.
the class StoreInstancesJob method run.
/**
* @see Job#run(IProgressMonitor)
*/
@Override
public IStatus run(IProgressMonitor monitor) {
boolean exactProgress = instances.hasSize();
monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN));
AtomicInteger count = new AtomicInteger(0);
TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>();
if (report != null) {
// set the correct start time
report.setStartTime(new Date());
}
// get database connection
DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite();
ODatabaseDocumentTx db = ref.getDatabase();
ATransaction trans = log.begin("Store instances in database");
try {
// use intent
db.declareIntent(new OIntentMassiveInsert());
// Find all the InstanceProcessors to feed them the stored Instances
final List<InstanceProcessor> processors;
if (doProcessing) {
final InstanceProcessingExtension ext = new InstanceProcessingExtension(serviceProvider);
processors = ext.getInstanceProcessors();
} else {
processors = Collections.emptyList();
}
BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE);
final InstanceIndexService indexService;
if (doProcessing) {
indexService = serviceProvider.getService(InstanceIndexService.class);
} else {
indexService = null;
}
// TODO decouple next() and save()?
SimpleLogContext.withLog(report, () -> {
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
ResourceIterator<Instance> it = instances.iterator();
int size = instances.size();
try {
while (it.hasNext() && !monitor.isCanceled()) {
// last count update
long lastUpdate = 0;
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(report);
}
Instance instance = it.next();
// further processing before storing
processInstance(instance);
// get/create OInstance
OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance)));
conv.setInserted(true);
// update the instance to store, e.g. generating
// metadata
updateInstance(conv);
ODatabaseRecordThreadLocal.INSTANCE.set(db);
// configure the document
ODocument doc = conv.configureDocument(db);
// and save it
doc.save();
// Create an InstanceReference for the saved instance
// and
// feed it to all known InstanceProcessors. The
// decoration
// with ResolvableInstanceReference allows the
// InstanceProcessors to resolve the instances if
// required.
OrientInstanceReference oRef = new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition());
IdentifiableInstanceReference idRef = new IdentifiableInstanceReference(oRef, doc.getIdentity());
ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference(idRef, browser);
processors.forEach(p -> p.process(instance, resolvableRef));
if (indexService != null) {
indexService.add(instance, resolvableRef);
}
count.incrementAndGet();
TypeDefinition type = instance.getDefinition();
if (type != null) {
typeCount.adjustOrPutValue(type.getName(), 1, 1);
}
if (exactProgress) {
monitor.worked(1);
}
long now = System.currentTimeMillis();
if (now - lastUpdate > 100) {
// only update every 100
// milliseconds
monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count.get()), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : ""));
lastUpdate = now;
}
}
} finally {
it.close();
if (report != null && instances instanceof LogAware) {
((LogAware) instances).setLog(null);
}
}
});
db.declareIntent(null);
} catch (RuntimeException e) {
if (report != null) {
reportTypeCount(report, typeCount);
report.error(new MessageImpl("Error storing instances in database", e));
report.setSuccess(false);
reportHandler.publishReport(report);
}
throw e;
} finally {
ref.dispose();
trans.end();
/*
* Reset instances to prevent memory leak. It seems Eclipse
* internally holds a reference to the job (in JobInfo and/or
* ProgressMonitorFocusJobDialog) and this results in the instance
* collection not being garbage collected. This is especially bad,
* if an in-memory instance collection is used, e.g. a
* DefaultInstanceCollection that is used when loading a Shapefile.
*/
instances = null;
}
try {
onComplete();
} catch (RuntimeException e) {
String message = "Error while post processing stored instances";
if (report != null) {
report.error(new MessageImpl(message, e));
} else {
log.error(message, e);
}
}
String message = MessageFormat.format("Stored {0} instances in the database.", count);
if (monitor.isCanceled()) {
String warn = "Loading instances was canceled, incomplete data set in the database.";
if (report != null) {
report.warn(new MessageImpl(warn, null));
} else {
log.warn(warn);
}
}
if (report != null) {
reportTypeCount(report, typeCount);
report.setSuccess(true);
report.setSummary(message);
reportHandler.publishReport(report);
} else {
log.info(message);
}
monitor.done();
return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message);
}
use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.
the class PropertyResolverTest method testLoadShiporder.
/**
* Test loading a simple XML file with one instance
*
* @throws Exception if an error occurs
*/
@Test
public void testLoadShiporder() throws Exception {
InstanceCollection instances = loadXMLInstances(getClass().getResource("/data/shiporder/shiporder.xsd").toURI(), getClass().getResource("/data/shiporder/shiporder.xml").toURI());
ResourceIterator<Instance> it = instances.iterator();
try {
assertTrue(it.hasNext());
Instance instance = it.next();
assertNotNull(instance);
@SuppressWarnings("unused") TypeDefinition test = instance.getDefinition().getChildren().iterator().next().asProperty().getParentType();
assertTrue(PropertyResolver.hasProperty(instance, "{http://www.example.com}orderperson"));
assertTrue(PropertyResolver.hasProperty(instance, "{http://www.example.com}shipto.{http://www.example.com}city"));
assertTrue(PropertyResolver.getQueryPath(instance, "{http://www.example.com}shipto.{http://www.example.com}city").contains("{http://www.example.com}shipto.{http://www.example.com}city"));
assertTrue(PropertyResolver.hasProperty(instance, "orderperson"));
assertTrue(PropertyResolver.hasProperty(instance, "shipto.city"));
assertTrue(PropertyResolver.hasProperty(instance, "shipto.{http://www.example.com}city"));
assertEquals(PropertyResolver.getValues(instance, "shipto.city").iterator().next(), "4000 Stavanger");
} finally {
it.close();
}
}
use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.
the class CSVInstanceReaderTest method testReadSimple.
/**
* Test - read a sample csv schema and data.
*
* @throws Exception , if an error occurs
*/
@Test
public void testReadSimple() throws Exception {
String typeName = "location";
String[] properties = { "Name", "Xcoord", "Ycoord", "id" };
String[] dataFirstColumn = { "test", "12", "16", "1" };
int numberOfInstances = 2;
// read Schema ###
Schema schema = readCSVSchema("/data/test1.csv", typeName, "java.lang.String,java.lang.String,java.lang.String,java.lang.String", "Name,Xcoord,Ycoord,id", null, null, null);
// Test properties
TypeDefinition schemaType = schema.getType(QName.valueOf(typeName));
// Check every property for their existence
for (String propertyName : properties) {
assertEquals(propertyName, schemaType.getChild(QName.valueOf(propertyName)).getDisplayName());
}
// read Instances ###
InstanceCollection instances = readCSVInstances("/data/test1.csv", typeName, true, schema, null, null, null);
assertEquals(numberOfInstances, collectionSize(instances));
// get Type to check property definition (schema and instance
// combination)
TypeDefinition type = instances.iterator().next().getDefinition();
ChildDefinition<?> child = null;
assertEquals(typeName, type.getDisplayName());
for (int i = 0; i < properties.length; i++) {
child = type.getChild(QName.valueOf(properties[i]));
assertEquals(properties[i], child.getDisplayName());
}
// Check the values of the first (type) instance
Instance instance = instances.iterator().next();
Object[] value;
for (int i = 0; i < dataFirstColumn.length; i++) {
value = instance.getProperty(QName.valueOf(properties[i]));
assertEquals(dataFirstColumn[i], value[0]);
assertTrue(value[0] instanceof String);
}
}
use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.
the class CSVInstanceReaderTest method testReadWithPointDecimal.
/**
* Test - read a sample csv schema and data with point as a decimal divisor
*
* @throws Exception , if an error occurs
*/
@Test
public void testReadWithPointDecimal() throws Exception {
String typeName = "Random";
String[] properties = { "A", "B", "C", "D", "E" };
Object[] dataFirstColumn = { new Integer(1), "A", new Float(32647968.61), new Float(5649088.376), "Linderbacher Straße" };
int numberOfInstances = 5;
// read Schema ###
Schema schema = readCSVSchema("/data/test3-pointdecimal.csv", typeName, "java.lang.Integer,java.lang.String,java.lang.Float,java.lang.Float,java.lang.String", "A,B,C,D,E", ";", null, null, ".");
// Test properties
TypeDefinition schemaType = schema.getType(QName.valueOf(typeName));
// Check every property for their existence
for (String propertyName : properties) {
assertEquals(propertyName, schemaType.getChild(QName.valueOf(propertyName)).getDisplayName());
}
// read Instances ###
InstanceCollection instances = readCSVInstances("/data/test3-pointdecimal.csv", typeName, true, schema, ";", null, null, ".");
assertEquals(numberOfInstances, collectionSize(instances));
// get Type to check property definition (schema and instance
// combination)
TypeDefinition type = instances.iterator().next().getDefinition();
ChildDefinition<?> child = null;
assertEquals(typeName, type.getDisplayName());
for (int i = 0; i < properties.length; i++) {
child = type.getChild(QName.valueOf(properties[i]));
assertEquals(properties[i], child.getDisplayName());
}
// Check the values of the first (type) instance
Instance instance = instances.iterator().next();
Object[] value;
for (int i = 0; i < dataFirstColumn.length; i++) {
value = instance.getProperty(QName.valueOf(properties[i]));
assertEquals(dataFirstColumn[i], value[0]);
}
}
use of eu.esdihumboldt.hale.common.schema.model.TypeDefinition in project hale by halestudio.
the class CSVSchemaReaderTest method testRead2.
/**
* Test for no given property names and property types (using default
* settings)
*
* @throws Exception the Exception thrown if the test fails
*/
@Test
public void testRead2() throws Exception {
String prop = "Name,Xcoord,Ycoord,id";
CSVSchemaReader schemaReader = new CSVSchemaReader();
schemaReader.setSource(new DefaultInputSupplier(getClass().getResource("/data/test1.csv").toURI()));
schemaReader.setParameter(CommonSchemaConstants.PARAM_TYPENAME, Value.of("TestTyp"));
schemaReader.setParameter(CSVSchemaReader.PARAM_PROPERTY, null);
schemaReader.setParameter(CSVSchemaReader.PARAM_PROPERTYTYPE, null);
schemaReader.setParameter(CSVSchemaReader.PARAM_SEPARATOR, null);
schemaReader.setParameter(CSVSchemaReader.PARAM_QUOTE, null);
schemaReader.setParameter(CSVSchemaReader.PARAM_ESCAPE, null);
IOReport report = schemaReader.execute(new LogProgressIndicator());
assertTrue(report.isSuccess());
Schema schema = schemaReader.getSchema();
assertEquals(1, schema.getMappingRelevantTypes().size());
TypeDefinition type = schema.getMappingRelevantTypes().iterator().next();
assertTrue(type.getName().getLocalPart().equals("TestTyp"));
Iterator<? extends ChildDefinition<?>> it = type.getChildren().iterator();
while (it.hasNext()) {
assertTrue(prop.contains(it.next().getName().getLocalPart()));
}
}
Aggregations