Search in sources :

Example 1 with SequenceFileOutputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration in project geowave by locationtech.

the class DBScanIterationsJobRunner method run.

@Override
public int run(final Configuration config, final PropertyManagement runTimeProperties) throws Exception {
    runTimeProperties.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());
    FileSystem fs = null;
    try {
        fs = FileSystem.get(config);
        final String outputBaseDir = runTimeProperties.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
        Path startPath = new Path(outputBaseDir + "/level_0");
        if (fs.exists(startPath)) {
            // HPFortify "Path Manipulation"
            // False positive - path is internally managed
            fs.delete(startPath, true);
        }
        runTimeProperties.storeIfEmpty(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);
        final double maxDistance = runTimeProperties.getPropertyAsDouble(Partition.MAX_DISTANCE, 10);
        final double precisionDecreaseRate = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DECREASE_RATE, 0.15);
        double precisionFactor = runTimeProperties.getPropertyAsDouble(Partition.PARTITION_PRECISION, 1.0);
        runTimeProperties.storeIfEmpty(Partition.DISTANCE_THRESHOLDS, Double.toString(maxDistance));
        final boolean overrideSecondary = runTimeProperties.hasProperty(Partition.SECONDARY_PARTITIONER_CLASS);
        if (!overrideSecondary) {
            final Serializable distances = runTimeProperties.get(Partition.DISTANCE_THRESHOLDS);
            String dstStr;
            if (distances == null) {
                dstStr = "0.000001";
            } else {
                dstStr = distances.toString();
            }
            final String[] distancesArray = dstStr.split(",");
            final double[] distancePerDimension = new double[distancesArray.length];
            {
                int i = 0;
                for (final String eachDistance : distancesArray) {
                    distancePerDimension[i++] = Double.valueOf(eachDistance);
                }
            }
            boolean secondary = precisionFactor < 1.0;
            double total = 1.0;
            for (final double dist : distancePerDimension) {
                total *= dist;
            }
            secondary |= (total >= (Math.pow(maxDistance, distancePerDimension.length) * 2.0));
            if (secondary) {
                runTimeProperties.copy(Partition.PARTITIONER_CLASS, Partition.SECONDARY_PARTITIONER_CLASS);
            }
        }
        jobRunner.setInputFormatConfiguration(inputFormatConfiguration);
        jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(startPath));
        LOGGER.info("Running with partition distance {}", maxDistance);
        // HP Fortify "Command Injection" false positive
        // What Fortify considers "externally-influenced input"
        // comes only from users with OS-level access anyway
        final int initialStatus = jobRunner.run(config, runTimeProperties);
        if (initialStatus != 0) {
            return initialStatus;
        }
        precisionFactor = precisionFactor - precisionDecreaseRate;
        int maxIterationCount = runTimeProperties.getPropertyAsInt(ClusteringParameters.Clustering.MAX_ITERATIONS, 15);
        int iteration = 2;
        long lastRecordCount = 0;
        while ((maxIterationCount > 0) && (precisionFactor > 0)) {
            try {
                final Partitioner<?> partitioner = runTimeProperties.getClassInstance(PartitionParameters.Partition.PARTITIONER_CLASS, Partitioner.class, OrthodromicDistancePartitioner.class);
                partitioner.initialize(Job.getInstance(config), partitioner.getClass());
            } catch (final IllegalArgumentException argEx) {
                // this occurs if the partitioner decides that the distance
                // is
                // invalid (e.g. bigger than the map space).
                // In this case, we just exist out of the loop.
                // startPath has the final data
                LOGGER.info("Distance is invalid", argEx);
                break;
            } catch (final Exception e1) {
                throw new IOException(e1);
            }
            final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
            /**
             * Re-partitioning the fat geometries can force a large number of partitions. The geometries
             * end up being represented in multiple partitions. Better to skip secondary partitioning.
             * 0.9 is a bit of a magic number. Ideally, it is based on the area of the max distance cube
             * divided by the area as defined by threshold distances. However, looking up the partition
             * dimension space or assuming only two dimensions were both undesirable.
             */
            if ((precisionFactor <= 0.9) && !overrideSecondary) {
                localScopeProperties.store(Partition.SECONDARY_PARTITIONER_CLASS, PassthruPartitioner.class);
            }
            localScopeProperties.store(Partition.PARTITION_PRECISION, precisionFactor);
            jobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
            jobRunner.setFirstIteration(false);
            localScopeProperties.store(HullParameters.Hull.ZOOM_LEVEL, zoomLevel);
            localScopeProperties.store(HullParameters.Hull.ITERATION, iteration);
            localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
            // Set to zero to force each cluster to be moved into the next
            // iteration
            // even if no merge occurs
            localScopeProperties.store(ClusteringParameters.Clustering.MINIMUM_SIZE, 0);
            final Path nextPath = new Path(outputBaseDir + "/level_" + iteration);
            if (fs.exists(nextPath)) {
                // HPFortify "Path Manipulation"
                // False positive - path is internally managed
                fs.delete(nextPath, true);
            }
            jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
            // HP Fortify "Command Injection" false positive
            // What Fortify considers "externally-influenced input"
            // comes only from users with OS-level access anyway
            final int status = jobRunner.run(config, localScopeProperties);
            if (status != 0) {
                return status;
            }
            final long currentOutputCount = jobRunner.getCounterValue(TaskCounter.REDUCE_OUTPUT_RECORDS);
            if (currentOutputCount == lastRecordCount) {
                maxIterationCount = 0;
            }
            lastRecordCount = currentOutputCount;
            startPath = nextPath;
            maxIterationCount--;
            precisionFactor -= precisionDecreaseRate;
            iteration++;
        }
        final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);
        localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
        localScopeProperties.storeIfEmpty(OutputParameters.Output.DATA_NAMESPACE_URI, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE));
        localScopeProperties.storeIfEmpty(OutputParameters.Output.INDEX_ID, localScopeProperties.get(HullParameters.Hull.INDEX_NAME));
        inputLoadRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));
        // HP Fortify "Command Injection" false positive
        // What Fortify considers "externally-influenced input"
        // comes only from users with OS-level access anyway
        inputLoadRunner.run(config, runTimeProperties);
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Serializable(java.io.Serializable) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) IOException(java.io.IOException) SequenceFileOutputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)

Example 2 with SequenceFileOutputFormatConfiguration

use of org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration in project geowave by locationtech.

the class MultiLevelClusteringJobRunner method runJob.

private int runJob(final Configuration config, final PropertyManagement propertyManagement) throws Exception {
    final ClusteringRunner clusteringRunner = getClusteringRunner();
    final Integer zoomLevels = propertyManagement.getPropertyAsInt(Clustering.ZOOM_LEVELS, 1);
    jobExtractRunner.setConf(config);
    final String dataTypeId = propertyManagement.getPropertyAsString(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, "centroid");
    final String namespaceURI = propertyManagement.getPropertyAsString(ExtractParameters.Extract.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE);
    propertyManagement.storeIfEmpty(ExtractParameters.Extract.DATA_NAMESPACE_URI, namespaceURI);
    propertyManagement.storeIfEmpty(ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, dataTypeId);
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class);
    propertyManagement.storeIfEmpty(CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);
    propertyManagement.store(CentroidParameters.Centroid.DATA_TYPE_ID, dataTypeId);
    propertyManagement.store(CentroidParameters.Centroid.DATA_NAMESPACE_URI, namespaceURI);
    // TODO: set out index type for extracts?
    propertyManagement.storeIfEmpty(CentroidParameters.Centroid.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
    propertyManagement.storeIfEmpty(HullParameters.Hull.INDEX_NAME, SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions()).getName());
    // first. extract data
    int status = jobExtractRunner.run(config, propertyManagement);
    final Path extractPath = jobExtractRunner.getHdfsOutputPath();
    groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(extractPath));
    final boolean retainGroupAssigments = propertyManagement.getPropertyAsBoolean(Clustering.RETAIN_GROUP_ASSIGNMENTS, false);
    // run clustering for each level
    final String outputBaseDir = propertyManagement.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");
    FileSystem fs = null;
    try {
        fs = FileSystem.get(config);
        for (int i = 0; (status == 0) && (i < zoomLevels); i++) {
            final int zoomLevel = i + 1;
            clusteringRunner.setZoomLevel(zoomLevel);
            hullRunner.setZoomLevel(zoomLevel);
            // need to get this removed at some point.
            propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, zoomLevel);
            status = clusteringRunner.run(config, propertyManagement);
            if (status == 0) {
                final Path nextPath = new Path(outputBaseDir + "/" + "level_" + zoomLevel);
                if (fs.exists(nextPath)) {
                    // HPFortify "Path Manipulation"
                    // False positive - path is internally managed
                    fs.delete(nextPath, true);
                }
                groupAssignmentRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));
                groupAssignmentRunner.setZoomLevel(zoomLevel);
                // HP Fortify "Command Injection" false positive
                // What Fortify considers "externally-influenced input"
                // comes only from users with OS-level access anyway
                status = retainGroupAssigments ? groupAssignmentRunner.run(config, propertyManagement) : 0;
                if (status == 0) {
                    // HP Fortify "Command Injection" false positive
                    // What Fortify considers "externally-influenced input"
                    // comes only from users with OS-level access anyway
                    status = hullRunner.run(config, propertyManagement);
                }
                if (retainGroupAssigments) {
                    clusteringRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                    hullRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                    groupAssignmentRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(nextPath));
                }
            }
        }
        return status;
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileOutputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) SequenceFileInputFormatConfiguration(org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)

Aggregations

FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 SequenceFileInputFormatConfiguration (org.locationtech.geowave.analytic.mapreduce.SequenceFileInputFormatConfiguration)2 SequenceFileOutputFormatConfiguration (org.locationtech.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration)2 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)1 SpatialOptions (org.locationtech.geowave.core.geotime.index.SpatialOptions)1