Search in sources :

Example 1 with ExtractGeometryFilterVisitor

use of org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor in project geowave by locationtech.

the class KMeansRunner method run.

public void run() throws IOException {
    initContext();
    // Validate inputs
    if (inputDataStore == null) {
        LOGGER.error("You must supply an input datastore!");
        throw new IOException("You must supply an input datastore!");
    }
    if (isUseTime()) {
        scaledRange = KMeansUtils.setRunnerTimeParams(this, inputDataStore, typeName);
        if (scaledRange == null) {
            LOGGER.error("Failed to set time params for kmeans. Please specify a valid feature type.");
            throw new ParameterException("--useTime option: Failed to set time params");
        }
    }
    // Retrieve the feature adapters
    final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
    List<String> featureTypeNames;
    // If provided, just use the one
    if (typeName != null) {
        featureTypeNames = new ArrayList<>();
        featureTypeNames.add(typeName);
    } else {
        // otherwise, grab all the feature adapters
        featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
    }
    bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
    // This is required due to some funkiness in GeoWaveInputFormat
    final PersistentAdapterStore adapterStore = inputDataStore.createAdapterStore();
    final InternalAdapterStore internalAdapterStore = inputDataStore.createInternalAdapterStore();
    // Add a spatial filter if requested
    try {
        if (cqlFilter != null) {
            Geometry bbox = null;
            String cqlTypeName;
            if (typeName == null) {
                cqlTypeName = featureTypeNames.get(0);
            } else {
                cqlTypeName = typeName;
            }
            final short adapterId = internalAdapterStore.getAdapterId(cqlTypeName);
            final DataTypeAdapter<?> adapter = adapterStore.getAdapter(adapterId).getAdapter();
            if (adapter instanceof GeotoolsFeatureDataAdapter) {
                final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
                Filter filter;
                filter = ECQL.toFilter(cqlFilter);
                final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
                bbox = geoAndCompareOpData.getGeometry();
            }
            if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
                bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
            }
        }
    } catch (final CQLException e) {
        LOGGER.error("Unable to parse CQL: " + cqlFilter);
    }
    // Load RDD from datastore
    final RDDOptions kmeansOpts = new RDDOptions();
    kmeansOpts.setMinSplits(minSplits);
    kmeansOpts.setMaxSplits(maxSplits);
    kmeansOpts.setQuery(bldr.build());
    final GeoWaveRDD kmeansRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kmeansOpts);
    // Retrieve the input centroids
    LOGGER.debug("Retrieving input centroids from RDD...");
    centroidVectors = RDDUtils.rddFeatureVectors(kmeansRDD, timeField, scaledTimeRange);
    centroidVectors.cache();
    // Init the algorithm
    final KMeans kmeans = new KMeans();
    kmeans.setInitializationMode("kmeans||");
    kmeans.setK(numClusters);
    kmeans.setMaxIterations(numIterations);
    if (epsilon > -1.0) {
        kmeans.setEpsilon(epsilon);
    }
    // Run KMeans
    LOGGER.debug("Running KMeans algorithm...");
    outputModel = kmeans.run(centroidVectors.rdd());
    LOGGER.debug("Writing results to output store...");
    writeToOutputStore();
    LOGGER.debug("Results successfully written!");
}
Also used : VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) KMeans(org.apache.spark.mllib.clustering.KMeans) IOException(java.io.IOException) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) Geometry(org.locationtech.jts.geom.Geometry) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) GeotoolsFeatureDataAdapter(org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter) Filter(org.opengis.filter.Filter) ExtractGeometryFilterVisitorResult(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult) ParameterException(com.beust.jcommander.ParameterException) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) CQLException(org.geotools.filter.text.cql2.CQLException) ExtractGeometryFilterVisitor(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor)

Example 2 with ExtractGeometryFilterVisitor

use of org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor in project geowave by locationtech.

the class KDEJobRunner method runJob.

/**
 * Main method to execute the MapReduce analytic.
 */
@SuppressWarnings("deprecation")
public int runJob() throws Exception {
    Configuration conf = super.getConf();
    if (conf == null) {
        conf = new Configuration();
        setConf(conf);
    }
    Index inputPrimaryIndex = null;
    final Index[] idxArray = inputDataStoreOptions.createDataStore().getIndices();
    for (final Index idx : idxArray) {
        if ((idx != null) && ((kdeCommandLineOptions.getIndexName() == null) || kdeCommandLineOptions.getIndexName().equals(idx.getName()))) {
            inputPrimaryIndex = idx;
            break;
        }
    }
    final CoordinateReferenceSystem inputIndexCrs = GeometryUtils.getIndexCrs(inputPrimaryIndex);
    final String inputCrsCode = GeometryUtils.getCrsCode(inputIndexCrs);
    Index outputPrimaryIndex = outputIndex;
    CoordinateReferenceSystem outputIndexCrs = null;
    String outputCrsCode = null;
    if (outputPrimaryIndex != null) {
        outputIndexCrs = GeometryUtils.getIndexCrs(outputPrimaryIndex);
        outputCrsCode = GeometryUtils.getCrsCode(outputIndexCrs);
    } else {
        final SpatialDimensionalityTypeProvider sdp = new SpatialDimensionalityTypeProvider();
        final SpatialOptions so = sdp.createOptions();
        so.setCrs(inputCrsCode);
        outputPrimaryIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(so);
        outputIndexCrs = inputIndexCrs;
        outputCrsCode = inputCrsCode;
    }
    final CoordinateSystem cs = outputIndexCrs.getCoordinateSystem();
    final CoordinateSystemAxis csx = cs.getAxis(0);
    final CoordinateSystemAxis csy = cs.getAxis(1);
    final double xMax = csx.getMaximumValue();
    final double xMin = csx.getMinimumValue();
    final double yMax = csy.getMaximumValue();
    final double yMin = csy.getMinimumValue();
    if ((xMax == Double.POSITIVE_INFINITY) || (xMin == Double.NEGATIVE_INFINITY) || (yMax == Double.POSITIVE_INFINITY) || (yMin == Double.NEGATIVE_INFINITY)) {
        LOGGER.error("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
        throw new RuntimeException("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
    }
    DataStorePluginOptions rasterResizeOutputDataStoreOptions;
    String kdeCoverageName;
    // the KDE output and then run a resize operation
    if ((kdeCommandLineOptions.getTileSize() > 1)) {
        // this is the ending data store options after resize, the KDE will
        // need to output to a temporary namespace, a resize operation
        // will use the outputDataStoreOptions
        rasterResizeOutputDataStoreOptions = outputDataStoreOptions;
        // first clone the outputDataStoreOptions, then set it to a tmp
        // namespace
        final Map<String, String> configOptions = outputDataStoreOptions.getOptionsAsMap();
        final StoreFactoryOptions options = ConfigUtils.populateOptionsFromList(outputDataStoreOptions.getFactoryFamily().getDataStoreFactory().createOptionsInstance(), configOptions);
        options.setGeoWaveNamespace(outputDataStoreOptions.getGeoWaveNamespace() + "_tmp");
        outputDataStoreOptions = new DataStorePluginOptions(options);
        kdeCoverageName = kdeCommandLineOptions.getCoverageName() + TMP_COVERAGE_SUFFIX;
    } else {
        rasterResizeOutputDataStoreOptions = null;
        kdeCoverageName = kdeCommandLineOptions.getCoverageName();
    }
    if (kdeCommandLineOptions.getHdfsHostPort() == null) {
        final Properties configProperties = ConfigOptions.loadProperties(configFile);
        final String hdfsFSUrl = ConfigHDFSCommand.getHdfsUrl(configProperties);
        kdeCommandLineOptions.setHdfsHostPort(hdfsFSUrl);
    }
    GeoWaveConfiguratorBase.setRemoteInvocationParams(kdeCommandLineOptions.getHdfsHostPort(), kdeCommandLineOptions.getJobTrackerOrResourceManHostPort(), conf);
    conf.setInt(MAX_LEVEL_KEY, kdeCommandLineOptions.getMaxLevel());
    conf.setInt(MIN_LEVEL_KEY, kdeCommandLineOptions.getMinLevel());
    conf.set(COVERAGE_NAME_KEY, kdeCoverageName);
    if (kdeCommandLineOptions.getCqlFilter() != null) {
        conf.set(GaussianCellMapper.CQL_FILTER_KEY, kdeCommandLineOptions.getCqlFilter());
    }
    conf.setDouble(X_MIN_KEY, xMin);
    conf.setDouble(X_MAX_KEY, xMax);
    conf.setDouble(Y_MIN_KEY, yMin);
    conf.setDouble(Y_MAX_KEY, yMax);
    conf.set(INPUT_CRSCODE_KEY, inputCrsCode);
    conf.set(OUTPUT_CRSCODE_KEY, outputCrsCode);
    preJob1Setup(conf);
    final Job job = new Job(conf);
    job.setJarByClass(this.getClass());
    addJobClasspathDependencies(job, conf);
    job.setJobName(getJob1Name());
    job.setMapperClass(getJob1Mapper());
    job.setCombinerClass(CellSummationCombiner.class);
    job.setReducerClass(getJob1Reducer());
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(GeoWaveInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(8);
    job.setSpeculativeExecution(false);
    final PersistentAdapterStore adapterStore = inputDataStoreOptions.createAdapterStore();
    final IndexStore indexStore = inputDataStoreOptions.createIndexStore();
    final InternalAdapterStore internalAdapterStore = inputDataStoreOptions.createInternalAdapterStore();
    final short internalAdapterId = internalAdapterStore.getAdapterId(kdeCommandLineOptions.getFeatureType());
    final DataTypeAdapter<?> adapter = adapterStore.getAdapter(internalAdapterId).getAdapter();
    VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder().addTypeName(adapter.getTypeName());
    if (kdeCommandLineOptions.getIndexName() != null) {
        bldr = bldr.indexName(kdeCommandLineOptions.getIndexName());
    }
    GeoWaveInputFormat.setMinimumSplitCount(job.getConfiguration(), kdeCommandLineOptions.getMinSplits());
    GeoWaveInputFormat.setMaximumSplitCount(job.getConfiguration(), kdeCommandLineOptions.getMaxSplits());
    GeoWaveInputFormat.setStoreOptions(job.getConfiguration(), inputDataStoreOptions);
    if (kdeCommandLineOptions.getCqlFilter() != null) {
        Geometry bbox = null;
        if (adapter instanceof GeotoolsFeatureDataAdapter) {
            final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
            final Filter filter = ECQL.toFilter(kdeCommandLineOptions.getCqlFilter());
            final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
            bbox = geoAndCompareOpData.getGeometry();
        }
        if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
            bldr = bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
        }
    }
    GeoWaveInputFormat.setQuery(conf, bldr.build(), adapterStore, internalAdapterStore, indexStore);
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.delete(new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName()), true);
        FileOutputFormat.setOutputPath(job, new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/basic"));
        final boolean job1Success = job.waitForCompletion(true);
        boolean job2Success = false;
        boolean postJob2Success = false;
        // Linear MapReduce job chaining
        if (job1Success) {
            setupEntriesPerLevel(job, conf);
            // Stats Reducer Job configuration parameters
            final Job statsReducer = new Job(conf);
            statsReducer.setJarByClass(this.getClass());
            addJobClasspathDependencies(statsReducer, conf);
            statsReducer.setJobName(getJob2Name());
            statsReducer.setMapperClass(IdentityMapper.class);
            statsReducer.setPartitionerClass(getJob2Partitioner());
            statsReducer.setReducerClass(getJob2Reducer());
            statsReducer.setNumReduceTasks(getJob2NumReducers((kdeCommandLineOptions.getMaxLevel() - kdeCommandLineOptions.getMinLevel()) + 1));
            statsReducer.setMapOutputKeyClass(DoubleWritable.class);
            statsReducer.setMapOutputValueClass(LongWritable.class);
            statsReducer.setOutputKeyClass(getJob2OutputKeyClass());
            statsReducer.setOutputValueClass(getJob2OutputValueClass());
            statsReducer.setInputFormatClass(SequenceFileInputFormat.class);
            statsReducer.setOutputFormatClass(getJob2OutputFormatClass());
            FileInputFormat.setInputPaths(statsReducer, new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/basic"));
            setupJob2Output(conf, statsReducer, outputDataStoreOptions.getGeoWaveNamespace(), kdeCoverageName, outputPrimaryIndex);
            job2Success = statsReducer.waitForCompletion(true);
            if (job2Success) {
                postJob2Success = postJob2Actions(conf, outputDataStoreOptions.getGeoWaveNamespace(), kdeCoverageName);
            }
        } else {
            job2Success = false;
        }
        if (rasterResizeOutputDataStoreOptions != null) {
            // delegate to resize command to wrap it up with the correctly
            // requested tile size
            final ResizeMRCommand resizeCommand = new ResizeMRCommand();
            final File configFile = File.createTempFile("temp-config", null);
            final ManualOperationParams params = new ManualOperationParams();
            params.getContext().put(ConfigOptions.PROPERTIES_FILE_CONTEXT, configFile);
            final AddStoreCommand addStore = new AddStoreCommand();
            addStore.setParameters("temp-out");
            addStore.setPluginOptions(outputDataStoreOptions);
            addStore.execute(params);
            addStore.setParameters("temp-raster-out");
            addStore.setPluginOptions(rasterResizeOutputDataStoreOptions);
            addStore.execute(params);
            // We're going to override these anyway.
            resizeCommand.setParameters("temp-out", "temp-raster-out");
            resizeCommand.getOptions().setInputCoverageName(kdeCoverageName);
            resizeCommand.getOptions().setMinSplits(kdeCommandLineOptions.getMinSplits());
            resizeCommand.getOptions().setMaxSplits(kdeCommandLineOptions.getMaxSplits());
            resizeCommand.setHdfsHostPort(kdeCommandLineOptions.getHdfsHostPort());
            resizeCommand.setJobTrackerOrResourceManHostPort(kdeCommandLineOptions.getJobTrackerOrResourceManHostPort());
            resizeCommand.getOptions().setOutputCoverageName(kdeCommandLineOptions.getCoverageName());
            resizeCommand.getOptions().setOutputTileSize(kdeCommandLineOptions.getTileSize());
            final int resizeStatus = ToolRunner.run(resizeCommand.createRunner(params), new String[] {});
            if (resizeStatus == 0) {
                // delegate to clear command to clean up with tmp namespace
                // after successful resize
                final ClearStoreCommand clearCommand = new ClearStoreCommand();
                clearCommand.setParameters("temp-out");
                clearCommand.execute(params);
            } else {
                LOGGER.warn("Resize command error code '" + resizeStatus + "'.  Retaining temporary namespace '" + outputDataStoreOptions.getGeoWaveNamespace() + "' with tile size of 1.");
            }
        }
        fs.delete(new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName()), true);
        return (job1Success && job2Success && postJob2Success) ? 0 : 1;
    } finally {
        if (fs != null) {
            try {
                fs.close();
            } catch (final IOException e) {
                LOGGER.info(e.getMessage());
            // Attempt to close, but don't throw an error if it is
            // already closed.
            // Log message, so find bugs does not complain.
            }
        }
    }
}
Also used : VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) Configuration(org.apache.hadoop.conf.Configuration) ClearStoreCommand(org.locationtech.geowave.core.store.cli.store.ClearStoreCommand) CoordinateSystem(org.opengis.referencing.cs.CoordinateSystem) CoordinateSystemAxis(org.opengis.referencing.cs.CoordinateSystemAxis) Index(org.locationtech.geowave.core.store.api.Index) Properties(java.util.Properties) AddStoreCommand(org.locationtech.geowave.core.store.cli.store.AddStoreCommand) SpatialDimensionalityTypeProvider(org.locationtech.geowave.core.geotime.index.SpatialDimensionalityTypeProvider) ManualOperationParams(org.locationtech.geowave.core.cli.parser.ManualOperationParams) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) GeotoolsFeatureDataAdapter(org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter) FileSystem(org.apache.hadoop.fs.FileSystem) ExtractGeometryFilterVisitorResult(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult) CoordinateReferenceSystem(org.opengis.referencing.crs.CoordinateReferenceSystem) Job(org.apache.hadoop.mapreduce.Job) ResizeMRCommand(org.locationtech.geowave.adapter.raster.operations.ResizeMRCommand) Path(org.apache.hadoop.fs.Path) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) IOException(java.io.IOException) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) Geometry(org.locationtech.jts.geom.Geometry) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) Filter(org.opengis.filter.Filter) StoreFactoryOptions(org.locationtech.geowave.core.store.StoreFactoryOptions) ExtractGeometryFilterVisitor(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor) File(java.io.File) IndexStore(org.locationtech.geowave.core.store.index.IndexStore)

Aggregations

IOException (java.io.IOException)2 GeotoolsFeatureDataAdapter (org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter)2 VectorQueryBuilder (org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder)2 ExtractGeometryFilterVisitor (org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor)2 ExtractGeometryFilterVisitorResult (org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult)2 InternalAdapterStore (org.locationtech.geowave.core.store.adapter.InternalAdapterStore)2 PersistentAdapterStore (org.locationtech.geowave.core.store.adapter.PersistentAdapterStore)2 Geometry (org.locationtech.jts.geom.Geometry)2 Filter (org.opengis.filter.Filter)2 ParameterException (com.beust.jcommander.ParameterException)1 File (java.io.File)1 Properties (java.util.Properties)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Job (org.apache.hadoop.mapreduce.Job)1 KMeans (org.apache.spark.mllib.clustering.KMeans)1 CQLException (org.geotools.filter.text.cql2.CQLException)1 ResizeMRCommand (org.locationtech.geowave.adapter.raster.operations.ResizeMRCommand)1 GeoWaveRDD (org.locationtech.geowave.analytic.spark.GeoWaveRDD)1