Search in sources :

Example 1 with AnalyticItemWrapperFactory

use of org.locationtech.geowave.analytic.AnalyticItemWrapperFactory in project geowave by locationtech.

the class KMeansJumpJobRunner method run.

@Override
@SuppressWarnings("unchecked")
public int run(final Configuration configuration, final PropertyManagement propertyManagement) throws Exception {
    propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, currentZoomLevel);
    propertyManagement.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, SimpleFeatureItemWrapperFactory.class);
    propertyManagement.storeIfEmpty(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class);
    propertyManagement.storeIfEmpty(CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);
    propertyManagement.copy(CentroidParameters.Centroid.DATA_TYPE_ID, SampleParameters.Sample.DATA_TYPE_NAME);
    propertyManagement.copy(CentroidParameters.Centroid.INDEX_NAME, SampleParameters.Sample.INDEX_NAME);
    ClusteringUtils.createAdapter(propertyManagement);
    ClusteringUtils.createIndex(propertyManagement);
    final String currentBatchId = propertyManagement.getPropertyAsString(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());
    try {
        final NumericRange rangeOfIterations = propertyManagement.getPropertyAsRange(JumpParameters.Jump.RANGE_OF_CENTROIDS, new NumericRange(2, 200));
        propertyManagement.store(GlobalParameters.Global.PARENT_BATCH_ID, currentBatchId);
        final DataStorePluginOptions dataStoreOptions = ((PersistableStore) propertyManagement.getProperty(StoreParam.INPUT_STORE)).getDataStoreOptions();
        final DistortionGroupManagement distortionGroupManagement = new DistortionGroupManagement(dataStoreOptions);
        for (int k = (int) Math.max(2, Math.round(rangeOfIterations.getMin())); k < Math.round(rangeOfIterations.getMax()); k++) {
            // regardless of the algorithm, the sample set is fixed in size
            propertyManagement.store(SampleParameters.Sample.MIN_SAMPLE_SIZE, k);
            propertyManagement.store(SampleParameters.Sample.MAX_SAMPLE_SIZE, k);
            propertyManagement.store(SampleParameters.Sample.SAMPLE_SIZE, k);
            jumpRunner.setCentroidsCount(k);
            jumpRunner.setDataStoreOptions(dataStoreOptions);
            final String iterationBatchId = currentBatchId + "_" + k;
            propertyManagement.store(GlobalParameters.Global.BATCH_ID, iterationBatchId);
            jumpRunner.setReducerCount(k);
            final int status = super.run(configuration, propertyManagement);
            if (status != 0) {
                return status;
            }
        }
        propertyManagement.store(GlobalParameters.Global.BATCH_ID, currentBatchId);
        @SuppressWarnings("rawtypes") final Class<AnalyticItemWrapperFactory> analyticItemWrapperFC = propertyManagement.getPropertyAsClass(CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class);
        /**
         * Associate the batch id with the best set of groups so the caller can find the clusters for
         * the given batch
         */
        final int result = distortionGroupManagement.retainBestGroups((AnalyticItemWrapperFactory<SimpleFeature>) analyticItemWrapperFC.newInstance(), propertyManagement.getPropertyAsString(CentroidParameters.Centroid.DATA_TYPE_ID), propertyManagement.getPropertyAsString(CentroidParameters.Centroid.INDEX_NAME), currentBatchId, currentZoomLevel);
        return result;
    } catch (final Exception ex) {
        LOGGER.error("Cannot create distortions", ex);
        return 1;
    }
}
Also used : NumericRange(org.locationtech.geowave.core.index.numeric.NumericRange) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) DistortionGroupManagement(org.locationtech.geowave.analytic.clustering.DistortionGroupManagement) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) SimpleFeature(org.opengis.feature.simple.SimpleFeature) AnalyticItemWrapperFactory(org.locationtech.geowave.analytic.AnalyticItemWrapperFactory)

Example 2 with AnalyticItemWrapperFactory

use of org.locationtech.geowave.analytic.AnalyticItemWrapperFactory in project geowave by locationtech.

the class GroupAssigmentJobRunnerTest method init.

@Before
public void init() {
    final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroidtest", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
    runner.setMapReduceIntegrater(new MapReduceIntegration() {

        @Override
        public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
            tool.setConf(configuration);
            ((ParameterHelper<Object>) StoreParam.INPUT_STORE.getHelper()).setValue(configuration, GroupAssignmentMapReduce.class, StoreParam.INPUT_STORE.getHelper().getValue(runTimeProperties));
            return tool.run(new String[] {});
        }

        @Override
        public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
            Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
            Assert.assertEquals(10, job.getNumReduceTasks());
            final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), GroupAssignmentMapReduce.class);
            Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
            Assert.assertEquals(3, configWrapper.getInt(CentroidParameters.Centroid.ZOOM_LEVEL, -1));
            Assert.assertEquals("b1234", configWrapper.getString(GlobalParameters.Global.PARENT_BATCH_ID, ""));
            Assert.assertEquals("b12345", configWrapper.getString(GlobalParameters.Global.BATCH_ID, ""));
            try {
                final AnalyticItemWrapperFactory<?> wrapper = configWrapper.getInstance(CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class, SimpleFeatureItemWrapperFactory.class);
                Assert.assertEquals(SimpleFeatureItemWrapperFactory.class, wrapper.getClass());
                final DistanceFn<?> distancFn = configWrapper.getInstance(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, GeometryCentroidDistanceFn.class);
                Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());
            } catch (final InstantiationException e) {
                throw new IOException("Unable to configure system", e);
            } catch (final IllegalAccessException e) {
                throw new IOException("Unable to configure system", e);
            }
            return new Counters();
        }

        @Override
        public Job getJob(final Tool tool) throws IOException {
            return new Job(tool.getConf());
        }

        @Override
        public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
            return new Configuration();
        }
    });
    runner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
    runner.setZoomLevel(3);
    runner.setReducerCount(10);
    runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
    runTimeProperties.store(GlobalParameters.Global.BATCH_ID, "b12345");
    runTimeProperties.store(GlobalParameters.Global.PARENT_BATCH_ID, "b1234");
    runTimeProperties.store(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
    final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
    GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
    pluginOptions.selectPlugin("memory");
    final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
    final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
    opts.setGeoWaveNamespace(namespace);
    final PersistableStore store = new PersistableStore(pluginOptions);
    runTimeProperties.store(StoreParam.INPUT_STORE, store);
    final FeatureDataAdapter adapter = new FeatureDataAdapter(ftype);
    pluginOptions.createAdapterStore().addAdapter(adapter.asInternalAdapter(pluginOptions.createInternalAdapterStore().addTypeName(adapter.getTypeName())));
}
Also used : GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) Configuration(org.apache.hadoop.conf.Configuration) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) GroupAssignmentMapReduce(org.locationtech.geowave.analytic.mapreduce.clustering.GroupAssignmentMapReduce) SimpleFeatureItemWrapperFactory(org.locationtech.geowave.analytic.SimpleFeatureItemWrapperFactory) MemoryStoreFactoryFamily(org.locationtech.geowave.core.store.memory.MemoryStoreFactoryFamily) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Job(org.apache.hadoop.mapreduce.Job) MapReduceIntegration(org.locationtech.geowave.analytic.mapreduce.MapReduceIntegration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) MemoryRequiredOptions(org.locationtech.geowave.core.store.memory.MemoryRequiredOptions) AnalyticItemWrapperFactory(org.locationtech.geowave.analytic.AnalyticItemWrapperFactory) Tool(org.apache.hadoop.util.Tool) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) DistanceFn(org.locationtech.geowave.analytic.distance.DistanceFn) IOException(java.io.IOException) GeoWaveAnalyticJobRunner(org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) Counters(org.apache.hadoop.mapreduce.Counters) FeatureDataAdapter(org.locationtech.geowave.adapter.vector.FeatureDataAdapter) Before(org.junit.Before)

Aggregations

AnalyticItemWrapperFactory (org.locationtech.geowave.analytic.AnalyticItemWrapperFactory)2 PersistableStore (org.locationtech.geowave.analytic.store.PersistableStore)2 DataStorePluginOptions (org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions)2 IOException (java.io.IOException)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 Counters (org.apache.hadoop.mapreduce.Counters)1 Job (org.apache.hadoop.mapreduce.Job)1 SequenceFileInputFormat (org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat)1 Tool (org.apache.hadoop.util.Tool)1 Before (org.junit.Before)1 FeatureDataAdapter (org.locationtech.geowave.adapter.vector.FeatureDataAdapter)1 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)1 ScopedJobConfiguration (org.locationtech.geowave.analytic.ScopedJobConfiguration)1 SimpleFeatureItemWrapperFactory (org.locationtech.geowave.analytic.SimpleFeatureItemWrapperFactory)1 DistortionGroupManagement (org.locationtech.geowave.analytic.clustering.DistortionGroupManagement)1 DistanceFn (org.locationtech.geowave.analytic.distance.DistanceFn)1 FeatureCentroidDistanceFn (org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn)1 GeometryCentroidDistanceFn (org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn)1 GeoWaveAnalyticJobRunner (org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner)1