use of org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor in project geowave by locationtech.
the class KMeansRunner method run.
public void run() throws IOException {
initContext();
// Validate inputs
if (inputDataStore == null) {
LOGGER.error("You must supply an input datastore!");
throw new IOException("You must supply an input datastore!");
}
if (isUseTime()) {
scaledRange = KMeansUtils.setRunnerTimeParams(this, inputDataStore, typeName);
if (scaledRange == null) {
LOGGER.error("Failed to set time params for kmeans. Please specify a valid feature type.");
throw new ParameterException("--useTime option: Failed to set time params");
}
}
// Retrieve the feature adapters
final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
List<String> featureTypeNames;
// If provided, just use the one
if (typeName != null) {
featureTypeNames = new ArrayList<>();
featureTypeNames.add(typeName);
} else {
// otherwise, grab all the feature adapters
featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
}
bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
// This is required due to some funkiness in GeoWaveInputFormat
final PersistentAdapterStore adapterStore = inputDataStore.createAdapterStore();
final InternalAdapterStore internalAdapterStore = inputDataStore.createInternalAdapterStore();
// Add a spatial filter if requested
try {
if (cqlFilter != null) {
Geometry bbox = null;
String cqlTypeName;
if (typeName == null) {
cqlTypeName = featureTypeNames.get(0);
} else {
cqlTypeName = typeName;
}
final short adapterId = internalAdapterStore.getAdapterId(cqlTypeName);
final DataTypeAdapter<?> adapter = adapterStore.getAdapter(adapterId).getAdapter();
if (adapter instanceof GeotoolsFeatureDataAdapter) {
final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
Filter filter;
filter = ECQL.toFilter(cqlFilter);
final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
bbox = geoAndCompareOpData.getGeometry();
}
if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
}
}
} catch (final CQLException e) {
LOGGER.error("Unable to parse CQL: " + cqlFilter);
}
// Load RDD from datastore
final RDDOptions kmeansOpts = new RDDOptions();
kmeansOpts.setMinSplits(minSplits);
kmeansOpts.setMaxSplits(maxSplits);
kmeansOpts.setQuery(bldr.build());
final GeoWaveRDD kmeansRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kmeansOpts);
// Retrieve the input centroids
LOGGER.debug("Retrieving input centroids from RDD...");
centroidVectors = RDDUtils.rddFeatureVectors(kmeansRDD, timeField, scaledTimeRange);
centroidVectors.cache();
// Init the algorithm
final KMeans kmeans = new KMeans();
kmeans.setInitializationMode("kmeans||");
kmeans.setK(numClusters);
kmeans.setMaxIterations(numIterations);
if (epsilon > -1.0) {
kmeans.setEpsilon(epsilon);
}
// Run KMeans
LOGGER.debug("Running KMeans algorithm...");
outputModel = kmeans.run(centroidVectors.rdd());
LOGGER.debug("Writing results to output store...");
writeToOutputStore();
LOGGER.debug("Results successfully written!");
}
use of org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor in project geowave by locationtech.
the class KDEJobRunner method runJob.
/**
* Main method to execute the MapReduce analytic.
*/
@SuppressWarnings("deprecation")
public int runJob() throws Exception {
Configuration conf = super.getConf();
if (conf == null) {
conf = new Configuration();
setConf(conf);
}
Index inputPrimaryIndex = null;
final Index[] idxArray = inputDataStoreOptions.createDataStore().getIndices();
for (final Index idx : idxArray) {
if ((idx != null) && ((kdeCommandLineOptions.getIndexName() == null) || kdeCommandLineOptions.getIndexName().equals(idx.getName()))) {
inputPrimaryIndex = idx;
break;
}
}
final CoordinateReferenceSystem inputIndexCrs = GeometryUtils.getIndexCrs(inputPrimaryIndex);
final String inputCrsCode = GeometryUtils.getCrsCode(inputIndexCrs);
Index outputPrimaryIndex = outputIndex;
CoordinateReferenceSystem outputIndexCrs = null;
String outputCrsCode = null;
if (outputPrimaryIndex != null) {
outputIndexCrs = GeometryUtils.getIndexCrs(outputPrimaryIndex);
outputCrsCode = GeometryUtils.getCrsCode(outputIndexCrs);
} else {
final SpatialDimensionalityTypeProvider sdp = new SpatialDimensionalityTypeProvider();
final SpatialOptions so = sdp.createOptions();
so.setCrs(inputCrsCode);
outputPrimaryIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(so);
outputIndexCrs = inputIndexCrs;
outputCrsCode = inputCrsCode;
}
final CoordinateSystem cs = outputIndexCrs.getCoordinateSystem();
final CoordinateSystemAxis csx = cs.getAxis(0);
final CoordinateSystemAxis csy = cs.getAxis(1);
final double xMax = csx.getMaximumValue();
final double xMin = csx.getMinimumValue();
final double yMax = csy.getMaximumValue();
final double yMin = csy.getMinimumValue();
if ((xMax == Double.POSITIVE_INFINITY) || (xMin == Double.NEGATIVE_INFINITY) || (yMax == Double.POSITIVE_INFINITY) || (yMin == Double.NEGATIVE_INFINITY)) {
LOGGER.error("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
throw new RuntimeException("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
}
DataStorePluginOptions rasterResizeOutputDataStoreOptions;
String kdeCoverageName;
// the KDE output and then run a resize operation
if ((kdeCommandLineOptions.getTileSize() > 1)) {
// this is the ending data store options after resize, the KDE will
// need to output to a temporary namespace, a resize operation
// will use the outputDataStoreOptions
rasterResizeOutputDataStoreOptions = outputDataStoreOptions;
// first clone the outputDataStoreOptions, then set it to a tmp
// namespace
final Map<String, String> configOptions = outputDataStoreOptions.getOptionsAsMap();
final StoreFactoryOptions options = ConfigUtils.populateOptionsFromList(outputDataStoreOptions.getFactoryFamily().getDataStoreFactory().createOptionsInstance(), configOptions);
options.setGeoWaveNamespace(outputDataStoreOptions.getGeoWaveNamespace() + "_tmp");
outputDataStoreOptions = new DataStorePluginOptions(options);
kdeCoverageName = kdeCommandLineOptions.getCoverageName() + TMP_COVERAGE_SUFFIX;
} else {
rasterResizeOutputDataStoreOptions = null;
kdeCoverageName = kdeCommandLineOptions.getCoverageName();
}
if (kdeCommandLineOptions.getHdfsHostPort() == null) {
final Properties configProperties = ConfigOptions.loadProperties(configFile);
final String hdfsFSUrl = ConfigHDFSCommand.getHdfsUrl(configProperties);
kdeCommandLineOptions.setHdfsHostPort(hdfsFSUrl);
}
GeoWaveConfiguratorBase.setRemoteInvocationParams(kdeCommandLineOptions.getHdfsHostPort(), kdeCommandLineOptions.getJobTrackerOrResourceManHostPort(), conf);
conf.setInt(MAX_LEVEL_KEY, kdeCommandLineOptions.getMaxLevel());
conf.setInt(MIN_LEVEL_KEY, kdeCommandLineOptions.getMinLevel());
conf.set(COVERAGE_NAME_KEY, kdeCoverageName);
if (kdeCommandLineOptions.getCqlFilter() != null) {
conf.set(GaussianCellMapper.CQL_FILTER_KEY, kdeCommandLineOptions.getCqlFilter());
}
conf.setDouble(X_MIN_KEY, xMin);
conf.setDouble(X_MAX_KEY, xMax);
conf.setDouble(Y_MIN_KEY, yMin);
conf.setDouble(Y_MAX_KEY, yMax);
conf.set(INPUT_CRSCODE_KEY, inputCrsCode);
conf.set(OUTPUT_CRSCODE_KEY, outputCrsCode);
preJob1Setup(conf);
final Job job = new Job(conf);
job.setJarByClass(this.getClass());
addJobClasspathDependencies(job, conf);
job.setJobName(getJob1Name());
job.setMapperClass(getJob1Mapper());
job.setCombinerClass(CellSummationCombiner.class);
job.setReducerClass(getJob1Reducer());
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(DoubleWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setInputFormatClass(GeoWaveInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(8);
job.setSpeculativeExecution(false);
final PersistentAdapterStore adapterStore = inputDataStoreOptions.createAdapterStore();
final IndexStore indexStore = inputDataStoreOptions.createIndexStore();
final InternalAdapterStore internalAdapterStore = inputDataStoreOptions.createInternalAdapterStore();
final short internalAdapterId = internalAdapterStore.getAdapterId(kdeCommandLineOptions.getFeatureType());
final DataTypeAdapter<?> adapter = adapterStore.getAdapter(internalAdapterId).getAdapter();
VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder().addTypeName(adapter.getTypeName());
if (kdeCommandLineOptions.getIndexName() != null) {
bldr = bldr.indexName(kdeCommandLineOptions.getIndexName());
}
GeoWaveInputFormat.setMinimumSplitCount(job.getConfiguration(), kdeCommandLineOptions.getMinSplits());
GeoWaveInputFormat.setMaximumSplitCount(job.getConfiguration(), kdeCommandLineOptions.getMaxSplits());
GeoWaveInputFormat.setStoreOptions(job.getConfiguration(), inputDataStoreOptions);
if (kdeCommandLineOptions.getCqlFilter() != null) {
Geometry bbox = null;
if (adapter instanceof GeotoolsFeatureDataAdapter) {
final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
final Filter filter = ECQL.toFilter(kdeCommandLineOptions.getCqlFilter());
final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
bbox = geoAndCompareOpData.getGeometry();
}
if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
bldr = bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
}
}
GeoWaveInputFormat.setQuery(conf, bldr.build(), adapterStore, internalAdapterStore, indexStore);
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
fs.delete(new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName()), true);
FileOutputFormat.setOutputPath(job, new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/basic"));
final boolean job1Success = job.waitForCompletion(true);
boolean job2Success = false;
boolean postJob2Success = false;
// Linear MapReduce job chaining
if (job1Success) {
setupEntriesPerLevel(job, conf);
// Stats Reducer Job configuration parameters
final Job statsReducer = new Job(conf);
statsReducer.setJarByClass(this.getClass());
addJobClasspathDependencies(statsReducer, conf);
statsReducer.setJobName(getJob2Name());
statsReducer.setMapperClass(IdentityMapper.class);
statsReducer.setPartitionerClass(getJob2Partitioner());
statsReducer.setReducerClass(getJob2Reducer());
statsReducer.setNumReduceTasks(getJob2NumReducers((kdeCommandLineOptions.getMaxLevel() - kdeCommandLineOptions.getMinLevel()) + 1));
statsReducer.setMapOutputKeyClass(DoubleWritable.class);
statsReducer.setMapOutputValueClass(LongWritable.class);
statsReducer.setOutputKeyClass(getJob2OutputKeyClass());
statsReducer.setOutputValueClass(getJob2OutputValueClass());
statsReducer.setInputFormatClass(SequenceFileInputFormat.class);
statsReducer.setOutputFormatClass(getJob2OutputFormatClass());
FileInputFormat.setInputPaths(statsReducer, new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/basic"));
setupJob2Output(conf, statsReducer, outputDataStoreOptions.getGeoWaveNamespace(), kdeCoverageName, outputPrimaryIndex);
job2Success = statsReducer.waitForCompletion(true);
if (job2Success) {
postJob2Success = postJob2Actions(conf, outputDataStoreOptions.getGeoWaveNamespace(), kdeCoverageName);
}
} else {
job2Success = false;
}
if (rasterResizeOutputDataStoreOptions != null) {
// delegate to resize command to wrap it up with the correctly
// requested tile size
final ResizeMRCommand resizeCommand = new ResizeMRCommand();
final File configFile = File.createTempFile("temp-config", null);
final ManualOperationParams params = new ManualOperationParams();
params.getContext().put(ConfigOptions.PROPERTIES_FILE_CONTEXT, configFile);
final AddStoreCommand addStore = new AddStoreCommand();
addStore.setParameters("temp-out");
addStore.setPluginOptions(outputDataStoreOptions);
addStore.execute(params);
addStore.setParameters("temp-raster-out");
addStore.setPluginOptions(rasterResizeOutputDataStoreOptions);
addStore.execute(params);
// We're going to override these anyway.
resizeCommand.setParameters("temp-out", "temp-raster-out");
resizeCommand.getOptions().setInputCoverageName(kdeCoverageName);
resizeCommand.getOptions().setMinSplits(kdeCommandLineOptions.getMinSplits());
resizeCommand.getOptions().setMaxSplits(kdeCommandLineOptions.getMaxSplits());
resizeCommand.setHdfsHostPort(kdeCommandLineOptions.getHdfsHostPort());
resizeCommand.setJobTrackerOrResourceManHostPort(kdeCommandLineOptions.getJobTrackerOrResourceManHostPort());
resizeCommand.getOptions().setOutputCoverageName(kdeCommandLineOptions.getCoverageName());
resizeCommand.getOptions().setOutputTileSize(kdeCommandLineOptions.getTileSize());
final int resizeStatus = ToolRunner.run(resizeCommand.createRunner(params), new String[] {});
if (resizeStatus == 0) {
// delegate to clear command to clean up with tmp namespace
// after successful resize
final ClearStoreCommand clearCommand = new ClearStoreCommand();
clearCommand.setParameters("temp-out");
clearCommand.execute(params);
} else {
LOGGER.warn("Resize command error code '" + resizeStatus + "'. Retaining temporary namespace '" + outputDataStoreOptions.getGeoWaveNamespace() + "' with tile size of 1.");
}
}
fs.delete(new Path("/tmp/" + inputDataStoreOptions.getGeoWaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName()), true);
return (job1Success && job2Success && postJob2Success) ? 0 : 1;
} finally {
if (fs != null) {
try {
fs.close();
} catch (final IOException e) {
LOGGER.info(e.getMessage());
// Attempt to close, but don't throw an error if it is
// already closed.
// Log message, so find bugs does not complain.
}
}
}
}
Aggregations