Search in sources :

Example 1 with SequenceFileInputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.

the class DBScanIterationsJobRunner method run.

@Override
public int run(final Configuration config, final PropertyManagement runTimeProperties) throws Exception {
    runTimeProperties.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());
    FileSystem fs = null;
    try {
        fs = FileSystem.get(config);
        final String outputBaseDir = runTimeProperties.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
        Path startPath = new Path(outputBaseDir + "/level_0");
        if (fs.exists(startPath)) {
            // HPFortify "Path Manipulation"
            // False positive - path is internally managed
            fs.delete(startPath, true);
        }
        runTimeProperties.storeIfEmpty(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
        final double maxDistance = runTimeProperties.getPropertyAsDouble(Partition.MAX_DISTANCE, 10);
        final double precisionDecreaseRate = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DECREASE_RATE, 0.15);
        double precisionFactor = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_PRECISION, 1.0);
        runTimeProperties.storeIfEmpty(Partition.DISTANCE_THRESHOLDS, Double.toString(maxDistance));
        final boolean overrideSecondary = runTimeProperties.hasProperty(Partition.SECONDARY_PARTITIONER_CLASS);
        if (!overrideSecondary) {
            final Serializable distances = runTimeProperties.get(Partition.DISTANCE_THRESHOLDS);
            String dstStr;
            if (distances == null) {
                dstStr = "0.000001";
            } else {
                dstStr = distances.toString();
            }
            final String[] distancesArray = dstStr.split(",");
            final double[] distancePerDimension = new double[distancesArray.length];
            {
                int i = 0;
                for (final String eachDistance : distancesArray) {
                    distancePerDimension[i++] = Double.valueOf(eachDistance);
                }
            }
            boolean secondary = precisionFactor < 1.0;
            double total = 1.0;
            for (final double dist : distancePerDimension) {
                total *= dist;
            }
            secondary |= (total >= (Math.pow(maxDistance, distancePerDimension.length) * 2.0));
            if (secondary) {
                runTimeProperties.copy(Partition.PARTITIONER_CLASS, Partition.SECONDARY_PARTITIONER_CLASS);
            }
        }
        jobRunner.setInputFormatConfiguration(inputFormatConfiguration);
        jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(startPath));
        LOGGER.info("Running with partition distance {}", maxDistance);
        // HP Fortify "Command Injection" false positive
        // What Fortify considers "externally-influenced input"
        // comes only from users with OS-level access anyway
        final int initialStatus = jobRunner.run(config, runTimeProperties);
        if (initialStatus != 0) {
            return initialStatus;
        }
        precisionFactor = precisionFactor - precisionDecreaseRate;
        int maxIterationCount = runTimeProperties.getPropertyAsInt(ClusteringParameters.Clustering.MAX_ITERATIONS, 15);
        int iteration = 2;
        long lastRecordCount = 0;
        while ((maxIterationCount > 0) && (precisionFactor > 0)) {
            try {
                final Partitioner<?> partitioner = runTimeProperties.getClassInstance(PartitionParameters.Partition.PARTITIONER_CLASS, Partitioner.class, OrthodromicDistancePartitioner.class);
                partitioner.initialize(Job.getInstance(config), partitioner.getClass());
            } catch (final IllegalArgumentException argEx) {
                // this occurs if the partitioner decides that the distance
                // is
                // invalid (e.g. bigger than the map space).
                // In this case, we just exist out of the loop.
                // startPath has the final data
                LOGGER.info("Distance is invalid", argEx);
                break;
            } catch (final Exception e1) {
                throw new IOException(e1);
            }
            final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
            /**
             * Re-partitioning the fat geometries can force a large number of partitions. The geometries
             * end up being represented in multiple partitions. Better to skip secondary partitioning.
             * 0.9 is a bit of a magic number. Ideally, it is based on the area of the max distance cube
             * divided by the area as defined by threshold distances. However, looking up the partition
             * dimension space or assuming only two dimensions were both undesirable.
             */
            if ((precisionFactor <= 0.9) && !overrideSecondary) {
                localScopeProperties.store(Partition.SECONDARY_PARTITIONER_CLASS, PassthruPartitioner.class);
            }
            localScopeProperties.store(Partition.PARTITION_PRECISION, precisionFactor);
            jobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
            jobRunner.setFirstIteration(false);
            localScopeProperties.store(HullParameters.Hull.ZOOM_LEVEL, zoomLevel);
            localScopeProperties.store(HullParameters.Hull.ITERATION, iteration);
            localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
            // Set to zero to force each cluster to be moved into the next
            // iteration
            // even if no merge occurs
            localScopeProperties.store(ClusteringParameters.Clustering.MINIMUM_SIZE, 0);
            final Path nextPath = new Path(outputBaseDir + "/level_" + iteration);
            if (fs.exists(nextPath)) {
                // HPFortify "Path Manipulation"
                // False positive - path is internally managed
                fs.delete(nextPath, true);
            }
            jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
            // HP Fortify "Command Injection" false positive
            // What Fortify considers "externally-influenced input"
            // comes only from users with OS-level access anyway
            final int status = jobRunner.run(config, localScopeProperties);
            if (status != 0) {
                return status;
            }
            final long currentOutputCount = jobRunner.getCounterValue(TaskCounter.REDUCE_OUTPUT_RECORDS);
            if (currentOutputCount == lastRecordCount) {
                maxIterationCount = 0;
            }
            lastRecordCount = currentOutputCount;
            startPath = nextPath;
            maxIterationCount--;
            precisionFactor -= precisionDecreaseRate;
            iteration++;
        }
        final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
        localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
        localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_NAMESPACE_URI, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE));
        localScopeProperties.storeIfEmpty(OutputParameters.Output.INDEX_ID, localScopeProperties.get(HullParameters.Hull.INDEX_NAME));
        inputLoadRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
        // HP Fortify "Command Injection" false positive
        // What Fortify considers "externally-influenced input"
        // comes only from users with OS-level access anyway
        inputLoadRunner.run(config, runTimeProperties);
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Serializable(java.io.Serializable) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) IOException(java.io.IOException) SequenceFileOutputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)

Example 2 with SequenceFileInputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.

the class MultiLevelClusteringJobRunner method runJob.

private int runJob(final Configuration config, final PropertyManagement propertyManagement) throws Exception {
    final ClusteringRunner clusteringRunner = getClusteringRunner();
    final Integer zoomLevels = propertyManagement.getPropertyAsInt(Clustering.ZOOM_LEVELS, 1);
    jobExtractRunner.setConf(config);
    final String dataTypeId = propertyManagement.getPropertyAsString(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, "centroid");
    final String namespaceURI = propertyManagement.getPropertyAsString(ExtractParameters.Extract.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE);
    propertyManagement.storeIfEmpty(ExtractParameters.Extract.DATA_NAMESPACE_URI, namespaceURI);
    propertyManagement.storeIfEmpty(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, dataTypeId);
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class);
    propertyManagement.storeIfEmpty(CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);
    propertyManagement.store(CentroidParameters.Centroid.DATA_TYPE_ID, dataTypeId);
    propertyManagement.store(CentroidParameters.Centroid.DATA_NAMESPACE_URI, namespaceURI);
    // TODO: set out index type for extracts?
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
    propertyManagement.storeIfEmpty(HullParameters.Hull.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
    // first. extract data
    int status = jobExtractRunner.run(config, propertyManagement);
    final Path extractPath = jobExtractRunner.getHdfsOutputPath();
    groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    final boolean retainGroupAssigments = propertyManagement.getPropertyAsBoolean(Clustering.RETAIN_GROUP_ASSIGNMENTS, false);
    // run clustering for each level
    final String outputBaseDir = propertyManagement.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
    FileSystem fs = null;
    try {
        fs = FileSystem.get(config);
        for (int i = 0; (status == 0) && (i < zoomLevels); i++) {
            final int zoomLevel = i + 1;
            clusteringRunner.setZoomLevel(zoomLevel);
            hullRunner.setZoomLevel(zoomLevel);
            // need to get this removed at some point.
            propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, zoomLevel);
            status = clusteringRunner.run(config, propertyManagement);
            if (status == 0) {
                final Path nextPath = new Path(outputBaseDir + "/" + "level_" + zoomLevel);
                if (fs.exists(nextPath)) {
                    // HPFortify "Path Manipulation"
                    // False positive - path is internally managed
                    fs.delete(nextPath, true);
                }
                groupAssignmentRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
                groupAssignmentRunner.setZoomLevel(zoomLevel);
                // HP Fortify "Command Injection" false positive
                // What Fortify considers "externally-influenced input"
                // comes only from users with OS-level access anyway
                status = retainGroupAssigments ? groupAssignmentRunner.run(config, propertyManagement) : 0;
                if (status == 0) {
                    // HP Fortify "Command Injection" false positive
                    // What Fortify considers "externally-influenced input"
                    // comes only from users with OS-level access anyway
                    status = hullRunner.run(config, propertyManagement);
                }
                if (retainGroupAssigments) {
                    clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                    hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                    groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                }
            }
        }
        return status;
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileOutputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)

Example 3 with SequenceFileInputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.

the class NNJobRunnerTest method init.

@Before
public void init() {
    jjJobRunner.setMapReduceIntegrater(new MapReduceIntegration() {

        @Override
        public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
            tool.setConf(configuration);
            return ToolRunner.run(configuration, tool, new String[] {});
        }

        @Override
        public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
            Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
            Assert.assertEquals(10, job.getNumReduceTasks());
            final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), NNMapReduce.class);
            Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
            Assert.assertEquals(0.4, configWrapper.getDouble(Partition.MAX_DISTANCE, 0.0), 0.001);
            Assert.assertEquals(100, configWrapper.getInt(Partition.MAX_MEMBER_SELECTION, 1));
            try {
                final Partitioner<?> wrapper = configWrapper.getInstance(Partition.PARTITIONER_CLASS, Partitioner.class, null);
                Assert.assertEquals(OrthodromicDistancePartitioner.class, wrapper.getClass());
                final Partitioner<?> secondary = configWrapper.getInstance(Partition.SECONDARY_PARTITIONER_CLASS, Partitioner.class, null);
                Assert.assertEquals(OrthodromicDistancePartitioner.class, secondary.getClass());
                final DistanceFn<?> distancFn = configWrapper.getInstance(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, GeometryCentroidDistanceFn.class);
                Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());
            } catch (final InstantiationException e) {
                throw new IOException("Unable to configure system", e);
            } catch (final IllegalAccessException e) {
                throw new IOException("Unable to configure system", e);
            }
            Assert.assertEquals(10, job.getNumReduceTasks());
            return new Counters();
        }

        @Override
        public Job getJob(final Tool tool) throws IOException {
            return new Job(tool.getConf());
        }

        @Override
        public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
            return new Configuration();
        }
    });
    jjJobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
    jjJobRunner.setReducerCount(10);
    runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
    final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
    GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
    pluginOptions.selectPlugin("memory");
    final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
    final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
    opts.setGeoWaveNamespace(namespace);
    final PersistableStore store = new PersistableStore(pluginOptions);
    runTimeProperties.store(StoreParam.INPUT_STORE, store);
    runTimeProperties.store(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
    runTimeProperties.store(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
    runTimeProperties.store(Partition.SECONDARY_PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
    runTimeProperties.store(Partition.MAX_DISTANCE, Double.valueOf(0.4));
    runTimeProperties.store(Partition.MAX_MEMBER_SELECTION, Integer.valueOf(100));
}
Also used : GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) Configuration(org.apache.hadoop.conf.Configuration) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) OrthodromicDistancePartitioner(org.locationtech.geowave.analytic.partitioner.OrthodromicDistancePartitioner) MemoryStoreFactoryFamily(org.locationtech.geowave.core.store.memory.MemoryStoreFactoryFamily) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Job(org.apache.hadoop.mapreduce.Job) MapReduceIntegration(org.locationtech.geowave.analytic.mapreduce.MapReduceIntegration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) MemoryRequiredOptions(org.locationtech.geowave.core.store.memory.MemoryRequiredOptions) Partitioner(org.locationtech.geowave.analytic.partitioner.Partitioner) OrthodromicDistancePartitioner(org.locationtech.geowave.analytic.partitioner.OrthodromicDistancePartitioner) Tool(org.apache.hadoop.util.Tool) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) DistanceFn(org.locationtech.geowave.analytic.distance.DistanceFn) IOException(java.io.IOException) GeoWaveAnalyticJobRunner(org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) Counters(org.apache.hadoop.mapreduce.Counters) Before(org.junit.Before)

Example 4 with SequenceFileInputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.

the class ConvexHullJobRunnerTest method init.

@Before
public void init() {
    final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroidtest", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
    hullRunner.setMapReduceIntegrater(new MapReduceIntegration() {

        @Override
        public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
            tool.setConf(configuration);
            ((ParameterHelper<Object>) StoreParam.INPUT_STORE.getHelper()).setValue(configuration, ConvexHullMapReduce.class, StoreParam.INPUT_STORE.getHelper().getValue(runTimeProperties));
            return tool.run(new String[] {});
        }

        @Override
        public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
            Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
            Assert.assertEquals(10, job.getNumReduceTasks());
            final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), ConvexHullMapReduce.class);
            Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
            final PersistableStore persistableStore = (PersistableStore) StoreParam.INPUT_STORE.getHelper().getValue(job, ConvexHullMapReduce.class, null);
            final IndexStore indexStore = persistableStore.getDataStoreOptions().createIndexStore();
            try {
                Assert.assertTrue(indexStore.indexExists("spatial"));
                final PersistableStore persistableAdapterStore = (PersistableStore) StoreParam.INPUT_STORE.getHelper().getValue(job, ConvexHullMapReduce.class, null);
                final PersistentAdapterStore adapterStore = persistableAdapterStore.getDataStoreOptions().createAdapterStore();
                Assert.assertTrue(adapterStore.adapterExists(persistableAdapterStore.getDataStoreOptions().createInternalAdapterStore().getAdapterId("centroidtest")));
                final Projection<?> projection = configWrapper.getInstance(HullParameters.Hull.PROJECTION_CLASS, Projection.class, SimpleFeatureProjection.class);
                Assert.assertEquals(SimpleFeatureProjection.class, projection.getClass());
            } catch (final InstantiationException e) {
                throw new IOException("Unable to configure system", e);
            } catch (final IllegalAccessException e) {
                throw new IOException("Unable to configure system", e);
            }
            Assert.assertEquals(10, job.getNumReduceTasks());
            Assert.assertEquals(2, configWrapper.getInt(CentroidParameters.Centroid.ZOOM_LEVEL, -1));
            return new Counters();
        }

        @Override
        public Job getJob(final Tool tool) throws IOException {
            return new Job(tool.getConf());
        }

        @Override
        public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
            return new Configuration();
        }
    });
    hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration());
    runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
    runTimeProperties.store(InputParameters.Input.HDFS_INPUT_PATH, new Path("file://foo/bin"));
    runTimeProperties.store(GlobalParameters.Global.BATCH_ID, "b1234");
    runTimeProperties.store(HullParameters.Hull.DATA_TYPE_ID, "hullType");
    runTimeProperties.store(HullParameters.Hull.REDUCER_COUNT, 10);
    runTimeProperties.store(HullParameters.Hull.INDEX_NAME, "spatial");
    final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
    GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
    pluginOptions.selectPlugin("memory");
    final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
    final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
    opts.setGeoWaveNamespace(namespace);
    final PersistableStore store = new PersistableStore(pluginOptions);
    runTimeProperties.store(StoreParam.INPUT_STORE, store);
    final FeatureDataAdapter adapter = new FeatureDataAdapter(ftype);
    pluginOptions.createAdapterStore().addAdapter(adapter.asInternalAdapter(pluginOptions.createInternalAdapterStore().addTypeName(adapter.getTypeName())));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) SimpleFeatureProjection(org.locationtech.geowave.analytic.SimpleFeatureProjection) Projection(org.locationtech.geowave.analytic.Projection) MemoryStoreFactoryFamily(org.locationtech.geowave.core.store.memory.MemoryStoreFactoryFamily) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Job(org.apache.hadoop.mapreduce.Job) MapReduceIntegration(org.locationtech.geowave.analytic.mapreduce.MapReduceIntegration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) MemoryRequiredOptions(org.locationtech.geowave.core.store.memory.MemoryRequiredOptions) Tool(org.apache.hadoop.util.Tool) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) Path(org.apache.hadoop.fs.Path) ConvexHullMapReduce(org.locationtech.geowave.analytic.mapreduce.clustering.ConvexHullMapReduce) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) IOException(java.io.IOException) GeoWaveAnalyticJobRunner(org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) Counters(org.apache.hadoop.mapreduce.Counters) FeatureDataAdapter(org.locationtech.geowave.adapter.vector.FeatureDataAdapter) IndexStore(org.locationtech.geowave.core.store.index.IndexStore) SimpleFeatureProjection(org.locationtech.geowave.analytic.SimpleFeatureProjection) Before(org.junit.Before)

Example 5 with SequenceFileInputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.

the class GroupAssigmentJobRunnerTest method init.

@Before
public void init() {
    final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroidtest", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
    runner.setMapReduceIntegrater(new MapReduceIntegration() {

        @Override
        public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
            tool.setConf(configuration);
            ((ParameterHelper<Object>) StoreParam.INPUT_STORE.getHelper()).setValue(configuration, GroupAssignmentMapReduce.class, StoreParam.INPUT_STORE.getHelper().getValue(runTimeProperties));
            return tool.run(new String[] {});
        }

        @Override
        public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
            Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
            Assert.assertEquals(10, job.getNumReduceTasks());
            final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), GroupAssignmentMapReduce.class);
            Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
            Assert.assertEquals(3, configWrapper.getInt(CentroidParameters.Centroid.ZOOM_LEVEL, -1));
            Assert.assertEquals("b1234", configWrapper.getString(GlobalParameters.Global.PARENT_BATCH_ID, ""));
            Assert.assertEquals("b12345", configWrapper.getString(GlobalParameters.Global.BATCH_ID, ""));
            try {
                final AnalyticItemWrapperFactory<?> wrapper = configWrapper.getInstance(CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class, SimpleFeatureItemWrapperFactory.class);
                Assert.assertEquals(SimpleFeatureItemWrapperFactory.class, wrapper.getClass());
                final DistanceFn<?> distancFn = configWrapper.getInstance(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, GeometryCentroidDistanceFn.class);
                Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());
            } catch (final InstantiationException e) {
                throw new IOException("Unable to configure system", e);
            } catch (final IllegalAccessException e) {
                throw new IOException("Unable to configure system", e);
            }
            return new Counters();
        }

        @Override
        public Job getJob(final Tool tool) throws IOException {
            return new Job(tool.getConf());
        }

        @Override
        public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
            return new Configuration();
        }
    });
    runner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
    runner.setZoomLevel(3);
    runner.setReducerCount(10);
    runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
    runTimeProperties.store(GlobalParameters.Global.BATCH_ID, "b12345");
    runTimeProperties.store(GlobalParameters.Global.PARENT_BATCH_ID, "b1234");
    runTimeProperties.store(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
    final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
    GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
    pluginOptions.selectPlugin("memory");
    final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
    final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
    opts.setGeoWaveNamespace(namespace);
    final PersistableStore store = new PersistableStore(pluginOptions);
    runTimeProperties.store(StoreParam.INPUT_STORE, store);
    final FeatureDataAdapter adapter = new FeatureDataAdapter(ftype);
    pluginOptions.createAdapterStore().addAdapter(adapter.asInternalAdapter(pluginOptions.createInternalAdapterStore().addTypeName(adapter.getTypeName())));
}
Also used : GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) Configuration(org.apache.hadoop.conf.Configuration) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) GroupAssignmentMapReduce(org.locationtech.geowave.analytic.mapreduce.clustering.GroupAssignmentMapReduce) SimpleFeatureItemWrapperFactory(org.locationtech.geowave.analytic.SimpleFeatureItemWrapperFactory) MemoryStoreFactoryFamily(org.locationtech.geowave.core.store.memory.MemoryStoreFactoryFamily) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Job(org.apache.hadoop.mapreduce.Job) MapReduceIntegration(org.locationtech.geowave.analytic.mapreduce.MapReduceIntegration) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration) MemoryRequiredOptions(org.locationtech.geowave.core.store.memory.MemoryRequiredOptions) AnalyticItemWrapperFactory(org.locationtech.geowave.analytic.AnalyticItemWrapperFactory) Tool(org.apache.hadoop.util.Tool) ScopedJobConfiguration(org.locationtech.geowave.analytic.ScopedJobConfiguration) Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) GeometryCentroidDistanceFn(org.locationtech.geowave.analytic.distance.GeometryCentroidDistanceFn) DistanceFn(org.locationtech.geowave.analytic.distance.DistanceFn) IOException(java.io.IOException) GeoWaveAnalyticJobRunner(org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) FeatureCentroidDistanceFn(org.locationtech.geowave.analytic.distance.FeatureCentroidDistanceFn) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) Counters(org.apache.hadoop.mapreduce.Counters) FeatureDataAdapter(org.locationtech.geowave.adapter.vector.FeatureDataAdapter) Before(org.junit.Before)

Aggregations

Path (org.apache.hadoop.fs.Path)5 SequenceFileInputFormatConfiguration (org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)5 IOException (java.io.IOException)4 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)4 Configuration (org.apache.hadoop.conf.Configuration)3 Counters (org.apache.hadoop.mapreduce.Counters)3 Job (org.apache.hadoop.mapreduce.Job)3 SequenceFileInputFormat (org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat)3 Tool (org.apache.hadoop.util.Tool)3 Before (org.junit.Before)3 ScopedJobConfiguration (org.locationtech.geowave.analytic.ScopedJobConfiguration)3 GeoWaveAnalyticJobRunner (org.locationtech.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner)3 MapReduceIntegration (org.locationtech.geowave.analytic.mapreduce.MapReduceIntegration)3 PersistableStore (org.locationtech.geowave.analytic.store.PersistableStore)3 DataStorePluginOptions (org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions)3 MemoryRequiredOptions (org.locationtech.geowave.core.store.memory.MemoryRequiredOptions)3 MemoryStoreFactoryFamily (org.locationtech.geowave.core.store.memory.MemoryStoreFactoryFamily)3 FileSystem (org.apache.hadoop.fs.FileSystem)2 FeatureDataAdapter (org.locationtech.geowave.adapter.vector.FeatureDataAdapter)2 DistanceFn (org.locationtech.geowave.analytic.distance.DistanceFn)2