Search in sources :

Example 1 with XMLInspector

use of eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector in project hale by halestudio.

the class StreamGmlWriter method partitionByFeatureType.

private void partitionByFeatureType(ProgressIndicator progress, IOReporter reporter) throws IOException {
    // Threshold currently not supported if partitioning by feature type
    final Set<TypeDefinition> types = new HashSet<>();
    // Map GML IDs to features types and collect types
    final XMLInspector gadget = new XMLInspector();
    final Map<String, TypeDefinition> idToTypeMapping = new HashMap<>();
    try (ResourceIterator<Instance> it = getInstances().iterator()) {
        while (it.hasNext()) {
            Instance inst = it.next();
            types.add(inst.getDefinition());
            idToTypeMapping.put(gadget.getIdentity(inst), inst.getDefinition());
        }
    }
    final Map<TypeDefinition, URI> typeToTargetMapping = new HashMap<>();
    types.stream().forEach(t -> typeToTargetMapping.put(t, new File(PerTypePartsHandler.getTargetFilename(t.getName(), getTarget().getLocation())).toURI()));
    final PerTypePartsHandler handler = new PerTypePartsHandler(typeToTargetMapping, idToTypeMapping);
    final PerTypeInstanceCollection instancesPerType = PerTypeInstanceCollection.fromInstances(getInstances(), types);
    try {
        writeParts(instancesPerType.collectionsIterator(), handler, progress, reporter);
    } catch (XMLStreamException e) {
        throw new IOException(e.getMessage(), e);
    }
}
Also used : HashMap(java.util.HashMap) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) IOException(java.io.IOException) URI(java.net.URI) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) XMLStreamException(javax.xml.stream.XMLStreamException) XMLInspector(eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector) File(java.io.File) PerTypeInstanceCollection(eu.esdihumboldt.hale.common.instance.model.ext.impl.PerTypeInstanceCollection) HashSet(java.util.HashSet)

Example 2 with XMLInspector

use of eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector in project hale by halestudio.

the class StreamGmlWriter method partitionByExtent.

private void partitionByExtent(ProgressIndicator progress, IOReporter reporter) throws IOException {
    int maxNodes = getParameter(PARAM_PARTITION_BY_EXTENT_MAX_NODES).as(Integer.class, 1000);
    String mode = getParameter(PARAM_PARTITION_BY_EXTENT_MODE).as(String.class, PARTITION_BY_EXTENT_MODE_DATASET);
    final SubtaskProgressIndicator qtProgress = new SubtaskProgressIndicator(progress) {

        @Override
        protected String getCombinedTaskName(String taskName, String subtaskName) {
            return taskName + " (" + subtaskName + ")";
        }
    };
    // Map for instances that either contain no or multiple geometries
    Map<String, InstanceReference> unhandledInstances = new HashMap<>();
    QuadtreeBuilder<Point, InstanceReference> builder = new QuadtreeBuilder<>();
    try (ResourceIterator<Instance> it = getInstances().iterator()) {
        qtProgress.begin("Collecting geometries", getInstances().size());
        final XMLInspector gadget = new XMLInspector();
        int i = 0;
        while (it.hasNext()) {
            Instance inst = it.next();
            InstanceReference instRef = getInstances().getReference(inst);
            InstanceTraverser traverser = new DepthFirstInstanceTraverser();
            GeometryFinder finder = new GeometryFinder(getTargetCRS());
            traverser.traverse(inst, finder);
            List<GeometryProperty<?>> geoms = finder.getGeometries();
            if (geoms.isEmpty() || geoms.size() > 1) {
                unhandledInstances.put(gadget.getIdentity(inst), instRef);
            } else {
                GeometryProperty<?> geomProperty = geoms.get(0);
                Geometry geom = geomProperty.getGeometry();
                Point centroid;
                switch(mode) {
                    case PARTITION_BY_EXTENT_MODE_WORLD:
                        CoordinateReferenceSystem sourceCrs = geomProperty.getCRSDefinition().getCRS();
                        CodeDefinition wgs84 = new CodeDefinition("EPSG:4326");
                        try {
                            MathTransform toWgs84 = CRS.findMathTransform(sourceCrs, wgs84.getCRS());
                            Geometry geomWgs84 = JTS.transform(geom, toWgs84);
                            centroid = geomWgs84.getCentroid();
                        } catch (FactoryException | MismatchedDimensionException | TransformException e) {
                            log.error("Unable to transform geometry to WGS 84", e);
                            throw new IllegalStateException(e.getMessage(), e);
                        }
                        break;
                    case PARTITION_BY_EXTENT_MODE_DATASET:
                    // fall through to default
                    default:
                        centroid = geom.getCentroid();
                }
                builder.add(centroid, new IdentifiableInstanceReference(instRef, gadget.getIdentity(inst)));
            }
            qtProgress.advance(1);
            if (++i % 100 == 0) {
                qtProgress.setCurrentTask(MessageFormat.format("{0} instances processed", i));
            }
        }
        qtProgress.setCurrentTask("Building quadtree");
        FixedBoundaryQuadtree<InstanceReference> qt;
        switch(mode) {
            case PARTITION_BY_EXTENT_MODE_DATASET:
                qt = builder.build(maxNodes);
                break;
            case PARTITION_BY_EXTENT_MODE_WORLD:
                Envelope world = new Envelope(-180, 180, -90, 90);
                qt = builder.build(maxNodes, world);
                break;
            default:
                log.error(MessageFormat.format("Unrecognized extent partitioning mode \"{0}\", using dataset boundaries", mode));
                qt = builder.build(maxNodes);
        }
        qtProgress.setCurrentTask("Performing spatial partitioning");
        final Map<String, String> idToKeyMapping = new HashMap<>();
        final Map<String, Collection<InstanceReference>> keyToRefsMapping = new HashMap<>();
        // Instances without geometry or with multiple geometries
        keyToRefsMapping.put(ExtentPartsHandler.KEY_NO_GEOMETRY, unhandledInstances.values());
        unhandledInstances.keySet().stream().forEach(id -> idToKeyMapping.put(id, ExtentPartsHandler.KEY_NO_GEOMETRY));
        buildMappings(qt, idToKeyMapping, keyToRefsMapping);
        // Partition source instances based on quadtree tiles
        Iterator<InstanceCollection> collIt = new Iterator<InstanceCollection>() {

            private final Queue<String> keySet = new LinkedList<>(keyToRefsMapping.keySet());

            @Override
            public boolean hasNext() {
                return !keySet.isEmpty();
            }

            @Override
            public InstanceCollection next() {
                String key = keySet.poll();
                Collection<InstanceReference> refs = keyToRefsMapping.get(key);
                InstanceCollection instColl = new DefaultInstanceCollection(refs.stream().map(ref -> getInstances().getInstance(IdentifiableInstanceReference.getRootReference(ref))).collect(Collectors.toList()));
                return new ExtentPartsHandler.TreeKeyDecorator(instColl, key);
            }
        };
        final Map<String, URI> keyToTargetMapping = new HashMap<>();
        keyToRefsMapping.keySet().stream().forEach(k -> keyToTargetMapping.put(k, new File(ExtentPartsHandler.getTargetFilename(k, getTarget().getLocation())).toURI()));
        final ExtentPartsHandler handler = new ExtentPartsHandler(keyToTargetMapping, idToKeyMapping);
        qtProgress.end();
        try {
            writeParts(collIt, handler, progress, reporter);
        } catch (XMLStreamException e) {
            throw new IOException(e.getMessage(), e);
        }
    }
}
Also used : MathTransform(org.opengis.referencing.operation.MathTransform) HashMap(java.util.HashMap) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) GeometryFinder(eu.esdihumboldt.hale.common.instance.geometry.GeometryFinder) FactoryException(org.opengis.referencing.FactoryException) IdentifiableInstanceReference(eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference) Envelope(org.locationtech.jts.geom.Envelope) DefaultInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection) MismatchedDimensionException(org.opengis.geometry.MismatchedDimensionException) URI(java.net.URI) CodeDefinition(eu.esdihumboldt.hale.common.instance.geometry.impl.CodeDefinition) ResourceIterator(eu.esdihumboldt.hale.common.instance.model.ResourceIterator) Iterator(java.util.Iterator) CoordinateReferenceSystem(org.opengis.referencing.crs.CoordinateReferenceSystem) Queue(java.util.Queue) DepthFirstInstanceTraverser(eu.esdihumboldt.hale.common.instance.helper.DepthFirstInstanceTraverser) InstanceTraverser(eu.esdihumboldt.hale.common.instance.helper.InstanceTraverser) GeometryProperty(eu.esdihumboldt.hale.common.schema.geometry.GeometryProperty) PerTypeInstanceCollection(eu.esdihumboldt.hale.common.instance.model.ext.impl.PerTypeInstanceCollection) DefaultInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) TransformException(org.opengis.referencing.operation.TransformException) SubtaskProgressIndicator(eu.esdihumboldt.hale.common.core.io.impl.SubtaskProgressIndicator) QuadtreeBuilder(eu.esdihumboldt.util.geometry.quadtree.QuadtreeBuilder) Point(org.locationtech.jts.geom.Point) IOException(java.io.IOException) Point(org.locationtech.jts.geom.Point) DepthFirstInstanceTraverser(eu.esdihumboldt.hale.common.instance.helper.DepthFirstInstanceTraverser) Geometry(org.locationtech.jts.geom.Geometry) XMLStreamException(javax.xml.stream.XMLStreamException) InstanceReference(eu.esdihumboldt.hale.common.instance.model.InstanceReference) IdentifiableInstanceReference(eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference) XMLInspector(eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector) PerTypeInstanceCollection(eu.esdihumboldt.hale.common.instance.model.ext.impl.PerTypeInstanceCollection) DefaultInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection) Collection(java.util.Collection) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) File(java.io.File)

Example 3 with XMLInspector

use of eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector in project hale by halestudio.

the class XPlanGmlInstanceWriter method partitionByPlan.

private void partitionByPlan(ProgressIndicator progress, IOReporter reporter) throws IOException {
    final Set<TypeDefinition> planTypes = collectPlanTypes(getTargetSchema().getTypes());
    /*
		 * Split instances into plan and non-plan instances. Associate the ID of
		 * a plan with its plan type and the plan instance.
		 */
    final XMLInspector gadget = new XMLInspector();
    final DefaultInstanceCollection nonPlanInstances = new DefaultInstanceCollection();
    final Map<String, TypeDefinition> planIdToPlanTypeMapping = new HashMap<>();
    final Map<String, InstanceCollection> planIdToInstancesMapping = new HashMap<>();
    try (ResourceIterator<Instance> it = getInstances().iterator()) {
        while (it.hasNext()) {
            Instance inst = it.next();
            if (!planTypes.contains(inst.getDefinition())) {
                nonPlanInstances.add(inst);
                continue;
            }
            String planId = gadget.getIdentity(inst);
            planIdToInstancesMapping.put(planId, new DefaultInstanceCollection(Arrays.asList(inst)));
            planIdToPlanTypeMapping.put(planId, inst.getDefinition());
        }
    }
    /*
		 * Collect referenced instances for every plan instance
		 */
    for (String planId : planIdToInstancesMapping.keySet()) {
        MultiInstanceCollection mic = new MultiInstanceCollection(Arrays.asList(planIdToInstancesMapping.get(planId), nonPlanInstances));
        ReferenceGraph<String> rg = new ReferenceGraph<String>(new XMLInspector(), mic);
        Iterator<InstanceCollection> p = rg.partition(1, reporter);
        while (p.hasNext()) {
            boolean found = false;
            InstanceCollection c = p.next();
            Iterator<Instance> it = c.iterator();
            while (it.hasNext()) {
                Instance i = it.next();
                if (planId.equals(gadget.getIdentity(i))) {
                    planIdToInstancesMapping.put(planId, c);
                    found = true;
                    break;
                }
            }
            if (found) {
                break;
            }
        }
    }
    final MultipartHandler handler = new MultipartHandler() {

        @Override
        public String getTargetFilename(InstanceCollection part, URI originalTarget) {
            Path origPath = Paths.get(originalTarget).normalize();
            Pair<String, String> nameAndExt = DefaultMultipartHandler.getFileNameAndExtension(origPath.toString());
            String planId = null;
            for (Entry<String, InstanceCollection> mapping : planIdToInstancesMapping.entrySet()) {
                if (part == mapping.getValue()) {
                    planId = mapping.getKey();
                    break;
                }
            }
            if (planId == null) {
                throw new RuntimeException("Plan was not seen before");
            }
            // Replace all characters that are not allowed in XML IDs with
            // an underscore. In addition, the colon (:) is also replaced
            // to make sure that the resulting String can be used safely in
            // a file name.
            String sanitizedPlanId = planId.replaceAll("[^A-Za-z0-9-_.]", "_");
            return String.format("%s%s%s.%s.%s.%s", origPath.getParent().toString(), File.separator, nameAndExt.getFirst(), planIdToPlanTypeMapping.get(planId).getDisplayName(), sanitizedPlanId, nameAndExt.getSecond());
        }
    };
    try {
        writeParts(planIdToInstancesMapping.values().iterator(), handler, progress, reporter);
    } catch (XMLStreamException e) {
        throw new IOException(e.getMessage(), e);
    }
}
Also used : HashMap(java.util.HashMap) Instance(eu.esdihumboldt.hale.common.instance.model.Instance) MultipartHandler(eu.esdihumboldt.hale.io.gml.writer.internal.MultipartHandler) DefaultMultipartHandler(eu.esdihumboldt.hale.io.gml.writer.internal.DefaultMultipartHandler) DefaultInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection) URI(java.net.URI) TypeDefinition(eu.esdihumboldt.hale.common.schema.model.TypeDefinition) MultiInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.MultiInstanceCollection) ReferenceGraph(eu.esdihumboldt.hale.common.instance.graph.reference.ReferenceGraph) Path(java.nio.file.Path) MultiInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.MultiInstanceCollection) DefaultInstanceCollection(eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection) InstanceCollection(eu.esdihumboldt.hale.common.instance.model.InstanceCollection) IOException(java.io.IOException) XMLStreamException(javax.xml.stream.XMLStreamException) XMLInspector(eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector)

Aggregations

XMLInspector (eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector)3 Instance (eu.esdihumboldt.hale.common.instance.model.Instance)3 IOException (java.io.IOException)3 URI (java.net.URI)3 HashMap (java.util.HashMap)3 XMLStreamException (javax.xml.stream.XMLStreamException)3 InstanceCollection (eu.esdihumboldt.hale.common.instance.model.InstanceCollection)2 PerTypeInstanceCollection (eu.esdihumboldt.hale.common.instance.model.ext.impl.PerTypeInstanceCollection)2 DefaultInstanceCollection (eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstanceCollection)2 TypeDefinition (eu.esdihumboldt.hale.common.schema.model.TypeDefinition)2 File (java.io.File)2 SubtaskProgressIndicator (eu.esdihumboldt.hale.common.core.io.impl.SubtaskProgressIndicator)1 GeometryFinder (eu.esdihumboldt.hale.common.instance.geometry.GeometryFinder)1 CodeDefinition (eu.esdihumboldt.hale.common.instance.geometry.impl.CodeDefinition)1 ReferenceGraph (eu.esdihumboldt.hale.common.instance.graph.reference.ReferenceGraph)1 DepthFirstInstanceTraverser (eu.esdihumboldt.hale.common.instance.helper.DepthFirstInstanceTraverser)1 InstanceTraverser (eu.esdihumboldt.hale.common.instance.helper.InstanceTraverser)1 IdentifiableInstanceReference (eu.esdihumboldt.hale.common.instance.model.IdentifiableInstanceReference)1 InstanceReference (eu.esdihumboldt.hale.common.instance.model.InstanceReference)1 ResourceIterator (eu.esdihumboldt.hale.common.instance.model.ResourceIterator)1