Search in sources :

Example 1 with PartitionData

use of org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData in project geowave by locationtech.

the class NNProcessor method add.

private PartitionData add(final PartitionData pd, final ByteArray itemId) {
    PartitionData singleton = uniqueSetOfPartitions.get(pd);
    if (singleton == null) {
        uniqueSetOfPartitions.put(pd, pd);
        singleton = pd;
    }
    Set<ByteArray> idsSet = partitionsToIds.get(singleton);
    if (idsSet == null) {
        idsSet = new HashSet<>();
        partitionsToIds.put(singleton, idsSet);
    }
    if (idsSet.size() > upperBoundPerPartition) {
        return null;
    }
    if (idsSet.size() == upperBoundPerPartition) {
        LOGGER.warn("At upper bound on partition.  Increase the bounds or condense the data.");
    }
    idsSet.add(itemId);
    Set<PartitionData> partitionSet = idsToPartition.get(itemId);
    if (partitionSet == null) {
        partitionSet = new HashSet<>();
        idsToPartition.put(itemId, partitionSet);
    }
    partitionSet.add(singleton);
    return singleton;
}
Also used : PartitionData(org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData) ByteArray(org.locationtech.geowave.core.index.ByteArray)

Example 2 with PartitionData

use of org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData in project geowave by locationtech.

the class NNProcessor method process.

public void process(final NeighborListFactory<STORE_VALUE> listFactory, final CompleteNotifier<STORE_VALUE> notification) throws IOException, InterruptedException {
    LOGGER.info("Processing " + parentPartition.toString() + " with primary = " + primaries.size() + " and other = " + others.size());
    LOGGER.info("Processing " + parentPartition.toString() + " with sub-partitions = " + uniqueSetOfPartitions.size());
    index = new NeighborIndex<>(listFactory);
    double farthestDistance = 0;
    ByteArray farthestNeighbor = null;
    ByteArray nextStart = startingPoint;
    final Set<ByteArray> inspectionSet = new HashSet<>();
    inspectionSet.addAll(primaries.keySet());
    if ((inspectionSet.size() > 0) && (nextStart == null)) {
        nextStart = inspectionSet.iterator().next();
    }
    while (nextStart != null) {
        inspectionSet.remove(nextStart);
        farthestDistance = 0;
        final Set<PartitionData> partition = idsToPartition.get(nextStart);
        final STORE_VALUE primary = primaries.get(nextStart);
        final ByteArray primaryId = nextStart;
        nextStart = null;
        farthestNeighbor = null;
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("processing " + primaryId);
        }
        if (primary == null) {
            if (inspectionSet.size() > 0) {
                nextStart = inspectionSet.iterator().next();
            }
            continue;
        }
        final NeighborList<STORE_VALUE> primaryList = index.init(primaryId, primary);
        for (final PartitionData pd : partition) {
            for (final ByteArray neighborId : partitionsToIds.get(pd)) {
                if (neighborId.equals(primaryId)) {
                    continue;
                }
                boolean isAPrimary = true;
                STORE_VALUE neighbor = primaries.get(neighborId);
                if (neighbor == null) {
                    neighbor = others.get(neighborId);
                    isAPrimary = false;
                } else // prior processed primary
                if (!inspectionSet.contains(neighborId)) {
                    continue;
                }
                if (neighbor == null) {
                    continue;
                }
                final InferType inferResult = primaryList.infer(neighborId, neighbor);
                if (inferResult == InferType.NONE) {
                    final DistanceProfile<?> distanceProfile = distanceProfileFn.computeProfile(primary, neighbor);
                    final double distance = distanceProfile.getDistance();
                    if (distance <= maxDistance) {
                        index.add(distanceProfile, primaryId, primary, neighborId, neighbor, isAPrimary);
                        if (LOGGER.isTraceEnabled()) {
                            LOGGER.trace("Neighbor " + neighborId);
                        }
                    }
                    if ((distance > farthestDistance) && inspectionSet.contains(neighborId)) {
                        farthestDistance = distance;
                        farthestNeighbor = neighborId;
                    }
                } else if (inferResult == InferType.REMOVE) {
                    inspectionSet.remove(neighborId);
                }
            }
        }
        notification.complete(primaryId, primary, primaryList);
        index.empty(primaryId);
        if ((farthestNeighbor == null) && (inspectionSet.size() > 0)) {
            nextStart = inspectionSet.iterator().next();
        } else {
            nextStart = farthestNeighbor;
        }
    }
}
Also used : InferType(org.locationtech.geowave.analytic.nn.NeighborList.InferType) PartitionData(org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData) ByteArray(org.locationtech.geowave.core.index.ByteArray) HashSet(java.util.HashSet)

Example 3 with PartitionData

use of org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData in project geowave by locationtech.

the class BoundaryDistancePartitionerTest method test.

@Test
public void test() throws IOException, ClassNotFoundException {
    final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroid", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
    final GeometryFactory factory = new GeometryFactory();
    SimpleFeature feature = AnalyticFeature.createGeometryFeature(ftype, "b1", "123", "fred", "NA", 20.30203, factory.createPoint(new Coordinate(0, 0)), new String[] { "extra1" }, new double[] { 0.022 }, 1, 1, 0);
    final PropertyManagement propertyManagement = new PropertyManagement();
    propertyManagement.store(PartitionParameters.Partition.DISTANCE_THRESHOLDS, "10000");
    propertyManagement.store(CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS, SpatialIndexModelBuilder.class);
    propertyManagement.store(ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);
    propertyManagement.store(GlobalParameters.Global.CRS_ID, "EPSG:4326");
    propertyManagement.store(PartitionParameters.Partition.GEOMETRIC_DISTANCE_UNIT, "m");
    final BoundaryPartitioner partitioner = new BoundaryPartitioner();
    final Configuration configuration = new Configuration();
    final Class<?> scope = BoundaryDistancePartitionerTest.class;
    propertyManagement.setJobConfiguration(configuration, scope);
    partitioner.initialize(Job.getInstance(configuration), scope);
    List<PartitionData> partitions = partitioner.getCubeIdentifiers(feature);
    assertEquals(4, partitions.size());
    assertTrue(hasNPrimary(partitions, 1));
    for (final PartitionData partition : partitions) {
        final MultiDimensionalNumericData ranges = partitioner.getRangesForPartition(partition);
        assertTrue(ranges.getDataPerDimension()[0].getMin() < 0.0000000001);
        assertTrue(ranges.getDataPerDimension()[0].getMax() > -0.0000000001);
        assertTrue(ranges.getDataPerDimension()[1].getMin() < 0.00000000001);
        assertTrue(ranges.getDataPerDimension()[1].getMax() > -0.0000000001);
    }
    feature = AnalyticFeature.createGeometryFeature(ftype, "b1", "123", "fred", "NA", 20.30203, factory.createPoint(new Coordinate(-179.99999996, 0)), new String[] { "extra1" }, new double[] { 0.022 }, 1, 1, 0);
    partitions = partitioner.getCubeIdentifiers(feature);
    assertEquals(4, partitions.size());
    assertTrue(hasNPrimary(partitions, 1));
    feature = AnalyticFeature.createGeometryFeature(ftype, "b1", "123", "fred", "NA", 20.30203, factory.createLinearRing(new Coordinate[] { new Coordinate(88, 0), new Coordinate(88, 0.001), new Coordinate(88.001, 0.001), new Coordinate(88.001, 0), new Coordinate(88, 0) }), new String[] { "extra1" }, new double[] { 0.022 }, 1, 1, 0);
    partitions = partitioner.getCubeIdentifiers(feature);
    assertTrue(hasNPrimary(partitions, 4));
}
Also used : MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) GeometryFactory(org.locationtech.jts.geom.GeometryFactory) Configuration(org.apache.hadoop.conf.Configuration) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) SimpleFeature(org.opengis.feature.simple.SimpleFeature) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) Coordinate(org.locationtech.jts.geom.Coordinate) PartitionData(org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData) Test(org.junit.Test)

Example 4 with PartitionData

use of org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData in project geowave by locationtech.

the class NNProcessor method trimSmallPartitions.

/**
 * @param size the minimum size of a partition to be processed
 * @return true if all partitions are emptt
 */
public boolean trimSmallPartitions(final int size) {
    final Iterator<Map.Entry<PartitionData, Set<ByteArray>>> it = partitionsToIds.entrySet().iterator();
    while (it.hasNext()) {
        final Map.Entry<PartitionData, Set<ByteArray>> entry = it.next();
        if (entry.getValue().size() < size) {
            for (final ByteArray id : entry.getValue()) {
                final Set<PartitionData> partitionsForId = idsToPartition.get(id);
                partitionsForId.remove(entry.getKey());
                if (partitionsForId.isEmpty()) {
                    this.primaries.remove(id);
                    this.others.remove(id);
                }
            }
            it.remove();
        }
    }
    return partitionsToIds.isEmpty();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) PartitionData(org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData) ByteArray(org.locationtech.geowave.core.index.ByteArray) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with PartitionData

use of org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData in project geowave by locationtech.

the class NNProcessor method remove.

public void remove(final ByteArray id) {
    final Set<PartitionData> partitionSet = idsToPartition.remove(id);
    if (partitionSet != null) {
        for (final PartitionData pd : partitionSet) {
            final Set<ByteArray> idSet = partitionsToIds.get(pd);
            if (idSet != null) {
                idSet.remove(id);
            }
        }
    }
    primaries.remove(id);
    others.remove(id);
    if (index != null) {
        index.empty(id);
    }
}
Also used : PartitionData(org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData) ByteArray(org.locationtech.geowave.core.index.ByteArray)

Aggregations

PartitionData (org.locationtech.geowave.analytic.partitioner.Partitioner.PartitionData)7 ByteArray (org.locationtech.geowave.core.index.ByteArray)5 Test (org.junit.Test)3 HashSet (java.util.HashSet)2 Configuration (org.apache.hadoop.conf.Configuration)2 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)2 MultiDimensionalNumericData (org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData)2 Coordinate (org.locationtech.jts.geom.Coordinate)2 GeometryFactory (org.locationtech.jts.geom.GeometryFactory)2 SimpleFeature (org.opengis.feature.simple.SimpleFeature)2 SimpleFeatureType (org.opengis.feature.simple.SimpleFeatureType)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 DataInputByteBuffer (org.apache.hadoop.io.DataInputByteBuffer)1 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)1