use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.
the class DBScanIterationsJobRunner method run.
@Override
public int run(final Configuration config, final PropertyManagement runTimeProperties) throws Exception {
runTimeProperties.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());
FileSystem fs = null;
try {
fs = FileSystem.get(config);
final String outputBaseDir = runTimeProperties.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
Path startPath = new Path(outputBaseDir + "/level_0");
if (fs.exists(startPath)) {
// HPFortify "Path Manipulation"
// False positive - path is internally managed
fs.delete(startPath, true);
}
runTimeProperties.storeIfEmpty(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
final double maxDistance = runTimeProperties.getPropertyAsDouble(Partition.MAX_DISTANCE, 10);
final double precisionDecreaseRate = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DECREASE_RATE, 0.15);
double precisionFactor = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_PRECISION, 1.0);
runTimeProperties.storeIfEmpty(Partition.DISTANCE_THRESHOLDS, Double.toString(maxDistance));
final boolean overrideSecondary = runTimeProperties.hasProperty(Partition.SECONDARY_PARTITIONER_CLASS);
if (!overrideSecondary) {
final Serializable distances = runTimeProperties.get(Partition.DISTANCE_THRESHOLDS);
String dstStr;
if (distances == null) {
dstStr = "0.000001";
} else {
dstStr = distances.toString();
}
final String[] distancesArray = dstStr.split(",");
final double[] distancePerDimension = new double[distancesArray.length];
{
int i = 0;
for (final String eachDistance : distancesArray) {
distancePerDimension[i++] = Double.valueOf(eachDistance);
}
}
boolean secondary = precisionFactor < 1.0;
double total = 1.0;
for (final double dist : distancePerDimension) {
total *= dist;
}
secondary |= (total >= (Math.pow(maxDistance, distancePerDimension.length) * 2.0));
if (secondary) {
runTimeProperties.copy(Partition.PARTITIONER_CLASS, Partition.SECONDARY_PARTITIONER_CLASS);
}
}
jobRunner.setInputFormatConfiguration(inputFormatConfiguration);
jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(startPath));
LOGGER.info("Running with partition distance {}", maxDistance);
// HP Fortify "Command Injection" false positive
// What Fortify considers "externally-influenced input"
// comes only from users with OS-level access anyway
final int initialStatus = jobRunner.run(config, runTimeProperties);
if (initialStatus != 0) {
return initialStatus;
}
precisionFactor = precisionFactor - precisionDecreaseRate;
int maxIterationCount = runTimeProperties.getPropertyAsInt(ClusteringParameters.Clustering.MAX_ITERATIONS, 15);
int iteration = 2;
long lastRecordCount = 0;
while ((maxIterationCount > 0) && (precisionFactor > 0)) {
try {
final Partitioner<?> partitioner = runTimeProperties.getClassInstance(PartitionParameters.Partition.PARTITIONER_CLASS, Partitioner.class, OrthodromicDistancePartitioner.class);
partitioner.initialize(Job.getInstance(config), partitioner.getClass());
} catch (final IllegalArgumentException argEx) {
// this occurs if the partitioner decides that the distance
// is
// invalid (e.g. bigger than the map space).
// In this case, we just exist out of the loop.
// startPath has the final data
LOGGER.info("Distance is invalid", argEx);
break;
} catch (final Exception e1) {
throw new IOException(e1);
}
final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
/**
* Re-partitioning the fat geometries can force a large number of partitions. The geometries
* end up being represented in multiple partitions. Better to skip secondary partitioning.
* 0.9 is a bit of a magic number. Ideally, it is based on the area of the max distance cube
* divided by the area as defined by threshold distances. However, looking up the partition
* dimension space or assuming only two dimensions were both undesirable.
*/
if ((precisionFactor <= 0.9) && !overrideSecondary) {
localScopeProperties.store(Partition.SECONDARY_PARTITIONER_CLASS, PassthruPartitioner.class);
}
localScopeProperties.store(Partition.PARTITION_PRECISION, precisionFactor);
jobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
jobRunner.setFirstIteration(false);
localScopeProperties.store(HullParameters.Hull.ZOOM_LEVEL, zoomLevel);
localScopeProperties.store(HullParameters.Hull.ITERATION, iteration);
localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
// Set to zero to force each cluster to be moved into the next
// iteration
// even if no merge occurs
localScopeProperties.store(ClusteringParameters.Clustering.MINIMUM_SIZE, 0);
final Path nextPath = new Path(outputBaseDir + "/level_" + iteration);
if (fs.exists(nextPath)) {
// HPFortify "Path Manipulation"
// False positive - path is internally managed
fs.delete(nextPath, true);
}
jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
// HP Fortify "Command Injection" false positive
// What Fortify considers "externally-influenced input"
// comes only from users with OS-level access anyway
final int status = jobRunner.run(config, localScopeProperties);
if (status != 0) {
return status;
}
final long currentOutputCount = jobRunner.getCounterValue(TaskCounter.REDUCE_OUTPUT_RECORDS);
if (currentOutputCount == lastRecordCount) {
maxIterationCount = 0;
}
lastRecordCount = currentOutputCount;
startPath = nextPath;
maxIterationCount--;
precisionFactor -= precisionDecreaseRate;
iteration++;
}
final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_NAMESPACE_URI, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE));
localScopeProperties.storeIfEmpty(OutputParameters.Output.INDEX_ID, localScopeProperties.get(HullParameters.Hull.INDEX_NAME));
inputLoadRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
// HP Fortify "Command Injection" false positive
// What Fortify considers "externally-influenced input"
// comes only from users with OS-level access anyway
inputLoadRunner.run(config, runTimeProperties);
} finally {
if (fs != null) {
fs.close();
}
}
return 0;
}
use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.
the class MultiLevelClusteringJobRunner method runJob.
private int runJob(final Configuration config, final PropertyManagement propertyManagement) throws Exception {
final ClusteringRunner clusteringRunner = getClusteringRunner();
final Integer zoomLevels = propertyManagement.getPropertyAsInt(Clustering.ZOOM_LEVELS, 1);
jobExtractRunner.setConf(config);
final String dataTypeId = propertyManagement.getPropertyAsString(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, "centroid");
final String namespaceURI = propertyManagement.getPropertyAsString(ExtractParameters.Extract.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE);
propertyManagement.storeIfEmpty(ExtractParameters.Extract.DATA_NAMESPACE_URI, namespaceURI);
propertyManagement.storeIfEmpty(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, dataTypeId);
propertyManagement.storeIfEmpty(CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class);
propertyManagement.storeIfEmpty(CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);
propertyManagement.store(CentroidParameters.Centroid.DATA_TYPE_ID, dataTypeId);
propertyManagement.store(CentroidParameters.Centroid.DATA_NAMESPACE_URI, namespaceURI);
// TODO: set out index type for extracts?
propertyManagement.storeIfEmpty(CentroidParameters.Centroid.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
propertyManagement.storeIfEmpty(HullParameters.Hull.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
// first. extract data
int status = jobExtractRunner.run(config, propertyManagement);
final Path extractPath = jobExtractRunner.getHdfsOutputPath();
groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
final boolean retainGroupAssigments = propertyManagement.getPropertyAsBoolean(Clustering.RETAIN_GROUP_ASSIGNMENTS, false);
// run clustering for each level
final String outputBaseDir = propertyManagement.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
FileSystem fs = null;
try {
fs = FileSystem.get(config);
for (int i = 0; (status == 0) && (i < zoomLevels); i++) {
final int zoomLevel = i + 1;
clusteringRunner.setZoomLevel(zoomLevel);
hullRunner.setZoomLevel(zoomLevel);
// need to get this removed at some point.
propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, zoomLevel);
status = clusteringRunner.run(config, propertyManagement);
if (status == 0) {
final Path nextPath = new Path(outputBaseDir + "/" + "level_" + zoomLevel);
if (fs.exists(nextPath)) {
// HPFortify "Path Manipulation"
// False positive - path is internally managed
fs.delete(nextPath, true);
}
groupAssignmentRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
groupAssignmentRunner.setZoomLevel(zoomLevel);
// HP Fortify "Command Injection" false positive
// What Fortify considers "externally-influenced input"
// comes only from users with OS-level access anyway
status = retainGroupAssigments ? groupAssignmentRunner.run(config, propertyManagement) : 0;
if (status == 0) {
// HP Fortify "Command Injection" false positive
// What Fortify considers "externally-influenced input"
// comes only from users with OS-level access anyway
status = hullRunner.run(config, propertyManagement);
}
if (retainGroupAssigments) {
clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
}
}
}
return status;
} finally {
if (fs != null) {
fs.close();
}
}
}
use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.
the class NNJobRunnerTest method init.
@Before
public void init() {
jjJobRunner.setMapReduceIntegrater(new MapReduceIntegration() {
@Override
public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
tool.setConf(configuration);
return ToolRunner.run(configuration, tool, new String[] {});
}
@Override
public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
Assert.assertEquals(10, job.getNumReduceTasks());
final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), NNMapReduce.class);
Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
Assert.assertEquals(0.4, configWrapper.getDouble(Partition.MAX_DISTANCE, 0.0), 0.001);
Assert.assertEquals(100, configWrapper.getInt(Partition.MAX_MEMBER_SELECTION, 1));
try {
final Partitioner<?> wrapper = configWrapper.getInstance(Partition.PARTITIONER_CLASS, Partitioner.class, null);
Assert.assertEquals(OrthodromicDistancePartitioner.class, wrapper.getClass());
final Partitioner<?> secondary = configWrapper.getInstance(Partition.SECONDARY_PARTITIONER_CLASS, Partitioner.class, null);
Assert.assertEquals(OrthodromicDistancePartitioner.class, secondary.getClass());
final DistanceFn<?> distancFn = configWrapper.getInstance(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, GeometryCentroidDistanceFn.class);
Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());
} catch (final InstantiationException e) {
throw new IOException("Unable to configure system", e);
} catch (final IllegalAccessException e) {
throw new IOException("Unable to configure system", e);
}
Assert.assertEquals(10, job.getNumReduceTasks());
return new Counters();
}
@Override
public Job getJob(final Tool tool) throws IOException {
return new Job(tool.getConf());
}
@Override
public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
return new Configuration();
}
});
jjJobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
jjJobRunner.setReducerCount(10);
runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
pluginOptions.selectPlugin("memory");
final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
opts.setGeoWaveNamespace(namespace);
final PersistableStore store = new PersistableStore(pluginOptions);
runTimeProperties.store(StoreParam.INPUT_STORE, store);
runTimeProperties.store(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
runTimeProperties.store(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
runTimeProperties.store(Partition.SECONDARY_PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
runTimeProperties.store(Partition.MAX_DISTANCE, Double.valueOf(0.4));
runTimeProperties.store(Partition.MAX_MEMBER_SELECTION, Integer.valueOf(100));
}
use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.
the class ConvexHullJobRunnerTest method init.
@Before
public void init() {
final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroidtest", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
hullRunner.setMapReduceIntegrater(new MapReduceIntegration() {
@Override
public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
tool.setConf(configuration);
((ParameterHelper<Object>) StoreParam.INPUT_STORE.getHelper()).setValue(configuration, ConvexHullMapReduce.class, StoreParam.INPUT_STORE.getHelper().getValue(runTimeProperties));
return tool.run(new String[] {});
}
@Override
public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
Assert.assertEquals(10, job.getNumReduceTasks());
final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), ConvexHullMapReduce.class);
Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
final PersistableStore persistableStore = (PersistableStore) StoreParam.INPUT_STORE.getHelper().getValue(job, ConvexHullMapReduce.class, null);
final IndexStore indexStore = persistableStore.getDataStoreOptions().createIndexStore();
try {
Assert.assertTrue(indexStore.indexExists("spatial"));
final PersistableStore persistableAdapterStore = (PersistableStore) StoreParam.INPUT_STORE.getHelper().getValue(job, ConvexHullMapReduce.class, null);
final PersistentAdapterStore adapterStore = persistableAdapterStore.getDataStoreOptions().createAdapterStore();
Assert.assertTrue(adapterStore.adapterExists(persistableAdapterStore.getDataStoreOptions().createInternalAdapterStore().getAdapterId("centroidtest")));
final Projection<?> projection = configWrapper.getInstance(HullParameters.Hull.PROJECTION_CLASS, Projection.class, SimpleFeatureProjection.class);
Assert.assertEquals(SimpleFeatureProjection.class, projection.getClass());
} catch (final InstantiationException e) {
throw new IOException("Unable to configure system", e);
} catch (final IllegalAccessException e) {
throw new IOException("Unable to configure system", e);
}
Assert.assertEquals(10, job.getNumReduceTasks());
Assert.assertEquals(2, configWrapper.getInt(CentroidParameters.Centroid.ZOOM_LEVEL, -1));
return new Counters();
}
@Override
public Job getJob(final Tool tool) throws IOException {
return new Job(tool.getConf());
}
@Override
public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
return new Configuration();
}
});
hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration());
runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
runTimeProperties.store(InputParameters.Input.HDFS_INPUT_PATH, new Path("file://foo/bin"));
runTimeProperties.store(GlobalParameters.Global.BATCH_ID, "b1234");
runTimeProperties.store(HullParameters.Hull.DATA_TYPE_ID, "hullType");
runTimeProperties.store(HullParameters.Hull.REDUCER_COUNT, 10);
runTimeProperties.store(HullParameters.Hull.INDEX_NAME, "spatial");
final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
pluginOptions.selectPlugin("memory");
final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
opts.setGeoWaveNamespace(namespace);
final PersistableStore store = new PersistableStore(pluginOptions);
runTimeProperties.store(StoreParam.INPUT_STORE, store);
final FeatureDataAdapter adapter = new FeatureDataAdapter(ftype);
pluginOptions.createAdapterStore().addAdapter(adapter.asInternalAdapter(pluginOptions.createInternalAdapterStore().addTypeName(adapter.getTypeName())));
}
use of org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration in project geowave by locationtech.
the class GroupAssigmentJobRunnerTest method init.
@Before
public void init() {
final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter("centroidtest", new String[] { "extra1" }, BasicFeatureTypes.DEFAULT_NAMESPACE, ClusteringUtils.CLUSTERING_CRS).getFeatureType();
runner.setMapReduceIntegrater(new MapReduceIntegration() {
@Override
public int submit(final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception {
tool.setConf(configuration);
((ParameterHelper<Object>) StoreParam.INPUT_STORE.getHelper()).setValue(configuration, GroupAssignmentMapReduce.class, StoreParam.INPUT_STORE.getHelper().getValue(runTimeProperties));
return tool.run(new String[] {});
}
@Override
public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException {
Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
Assert.assertEquals(10, job.getNumReduceTasks());
final ScopedJobConfiguration configWrapper = new ScopedJobConfiguration(job.getConfiguration(), GroupAssignmentMapReduce.class);
Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));
Assert.assertEquals(3, configWrapper.getInt(CentroidParameters.Centroid.ZOOM_LEVEL, -1));
Assert.assertEquals("b1234", configWrapper.getString(GlobalParameters.Global.PARENT_BATCH_ID, ""));
Assert.assertEquals("b12345", configWrapper.getString(GlobalParameters.Global.BATCH_ID, ""));
try {
final AnalyticItemWrapperFactory<?> wrapper = configWrapper.getInstance(CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class, SimpleFeatureItemWrapperFactory.class);
Assert.assertEquals(SimpleFeatureItemWrapperFactory.class, wrapper.getClass());
final DistanceFn<?> distancFn = configWrapper.getInstance(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, GeometryCentroidDistanceFn.class);
Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());
} catch (final InstantiationException e) {
throw new IOException("Unable to configure system", e);
} catch (final IllegalAccessException e) {
throw new IOException("Unable to configure system", e);
}
return new Counters();
}
@Override
public Job getJob(final Tool tool) throws IOException {
return new Job(tool.getConf());
}
@Override
public Configuration getConfiguration(final PropertyManagement runTimeProperties) throws IOException {
return new Configuration();
}
});
runner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
runner.setZoomLevel(3);
runner.setReducerCount(10);
runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");
runTimeProperties.store(GlobalParameters.Global.BATCH_ID, "b12345");
runTimeProperties.store(GlobalParameters.Global.PARENT_BATCH_ID, "b1234");
runTimeProperties.store(CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
final DataStorePluginOptions pluginOptions = new DataStorePluginOptions();
GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put("memory", new MemoryStoreFactoryFamily());
pluginOptions.selectPlugin("memory");
final MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions();
final String namespace = "test_" + getClass().getName() + "_" + name.getMethodName();
opts.setGeoWaveNamespace(namespace);
final PersistableStore store = new PersistableStore(pluginOptions);
runTimeProperties.store(StoreParam.INPUT_STORE, store);
final FeatureDataAdapter adapter = new FeatureDataAdapter(ftype);
pluginOptions.createAdapterStore().addAdapter(adapter.asInternalAdapter(pluginOptions.createInternalAdapterStore().addTypeName(adapter.getTypeName())));
}
Aggregations