Search in sources :

Example 1 with KMeans

use of org.apache.spark.mllib.clustering.KMeans in project geowave by locationtech.

the class KMeansRunner method run.

public void run() throws IOException {
    initContext();
    // Validate inputs
    if (inputDataStore == null) {
        LOGGER.error("You must supply an input datastore!");
        throw new IOException("You must supply an input datastore!");
    }
    if (isUseTime()) {
        scaledRange = KMeansUtils.setRunnerTimeParams(this, inputDataStore, typeName);
        if (scaledRange == null) {
            LOGGER.error("Failed to set time params for kmeans. Please specify a valid feature type.");
            throw new ParameterException("--useTime option: Failed to set time params");
        }
    }
    // Retrieve the feature adapters
    final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
    List<String> featureTypeNames;
    // If provided, just use the one
    if (typeName != null) {
        featureTypeNames = new ArrayList<>();
        featureTypeNames.add(typeName);
    } else {
        // otherwise, grab all the feature adapters
        featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
    }
    bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
    // This is required due to some funkiness in GeoWaveInputFormat
    final PersistentAdapterStore adapterStore = inputDataStore.createAdapterStore();
    final InternalAdapterStore internalAdapterStore = inputDataStore.createInternalAdapterStore();
    // Add a spatial filter if requested
    try {
        if (cqlFilter != null) {
            Geometry bbox = null;
            String cqlTypeName;
            if (typeName == null) {
                cqlTypeName = featureTypeNames.get(0);
            } else {
                cqlTypeName = typeName;
            }
            final short adapterId = internalAdapterStore.getAdapterId(cqlTypeName);
            final DataTypeAdapter<?> adapter = adapterStore.getAdapter(adapterId).getAdapter();
            if (adapter instanceof GeotoolsFeatureDataAdapter) {
                final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
                Filter filter;
                filter = ECQL.toFilter(cqlFilter);
                final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
                bbox = geoAndCompareOpData.getGeometry();
            }
            if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
                bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
            }
        }
    } catch (final CQLException e) {
        LOGGER.error("Unable to parse CQL: " + cqlFilter);
    }
    // Load RDD from datastore
    final RDDOptions kmeansOpts = new RDDOptions();
    kmeansOpts.setMinSplits(minSplits);
    kmeansOpts.setMaxSplits(maxSplits);
    kmeansOpts.setQuery(bldr.build());
    final GeoWaveRDD kmeansRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kmeansOpts);
    // Retrieve the input centroids
    LOGGER.debug("Retrieving input centroids from RDD...");
    centroidVectors = RDDUtils.rddFeatureVectors(kmeansRDD, timeField, scaledTimeRange);
    centroidVectors.cache();
    // Init the algorithm
    final KMeans kmeans = new KMeans();
    kmeans.setInitializationMode("kmeans||");
    kmeans.setK(numClusters);
    kmeans.setMaxIterations(numIterations);
    if (epsilon > -1.0) {
        kmeans.setEpsilon(epsilon);
    }
    // Run KMeans
    LOGGER.debug("Running KMeans algorithm...");
    outputModel = kmeans.run(centroidVectors.rdd());
    LOGGER.debug("Writing results to output store...");
    writeToOutputStore();
    LOGGER.debug("Results successfully written!");
}
Also used : VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) KMeans(org.apache.spark.mllib.clustering.KMeans) IOException(java.io.IOException) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) Geometry(org.locationtech.jts.geom.Geometry) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) GeotoolsFeatureDataAdapter(org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter) Filter(org.opengis.filter.Filter) ExtractGeometryFilterVisitorResult(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult) ParameterException(com.beust.jcommander.ParameterException) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) CQLException(org.geotools.filter.text.cql2.CQLException) ExtractGeometryFilterVisitor(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor)

Aggregations

ParameterException (com.beust.jcommander.ParameterException)1 IOException (java.io.IOException)1 KMeans (org.apache.spark.mllib.clustering.KMeans)1 CQLException (org.geotools.filter.text.cql2.CQLException)1 GeoWaveRDD (org.locationtech.geowave.analytic.spark.GeoWaveRDD)1 RDDOptions (org.locationtech.geowave.analytic.spark.RDDOptions)1 GeotoolsFeatureDataAdapter (org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter)1 VectorQueryBuilder (org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder)1 ExtractGeometryFilterVisitor (org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor)1 ExtractGeometryFilterVisitorResult (org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult)1 InternalAdapterStore (org.locationtech.geowave.core.store.adapter.InternalAdapterStore)1 PersistentAdapterStore (org.locationtech.geowave.core.store.adapter.PersistentAdapterStore)1 Geometry (org.locationtech.jts.geom.Geometry)1 Filter (org.opengis.filter.Filter)1