Search in sources :

Example 1 with UDFNameAndConstructor

use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.

the class SqlQueryRunner method run.

public Dataset<Row> run() throws IOException, InterruptedException, ExecutionException, ParseException {
    initContext();
    // Load stores and create views.
    loadStoresAndViews();
    // Create a version of the sql without string literals to check for
    // subquery syntax in sql statement.
    final Pattern stringLit = Pattern.compile("(?:\\'|\\\").*?(?:\\'|\\\")");
    final Matcher m = stringLit.matcher(sql);
    final String cleanedSql = m.replaceAll("");
    LOGGER.debug("cleaned SQL statement: " + cleanedSql);
    // injecting a optimized join into the process
    if (!cleanedSql.matches("(?i)^(?=(?:.*(?:\\b(?:INSERT INTO|UPDATE|SELECT|WITH|DELETE|CREATE TABLE|ALTER TABLE|DROP TABLE)\\b)){2})")) {
        // Parse sparks logical plan for query and determine if spatial join
        // is present
        LogicalPlan plan = null;
        plan = session.sessionState().sqlParser().parsePlan(sql);
        final JsonParser gsonParser = new JsonParser();
        final JsonElement jElement = gsonParser.parse(plan.prettyJson());
        if (jElement.isJsonArray()) {
            final JsonArray jArray = jElement.getAsJsonArray();
            final int size = jArray.size();
            for (int iObj = 0; iObj < size; iObj++) {
                final JsonElement childElement = jArray.get(iObj);
                if (childElement.isJsonObject()) {
                    final JsonObject jObj = childElement.getAsJsonObject();
                    final String objClass = jObj.get("class").getAsString();
                    if (Objects.equals(objClass, "org.apache.spark.sql.catalyst.plans.logical.Filter")) {
                        // Search through filter Object to determine if
                        // GeomPredicate function present in condition.
                        final JsonElement conditionElements = jObj.get("condition");
                        if (conditionElements.isJsonArray()) {
                            final JsonArray conditionArray = conditionElements.getAsJsonArray();
                            final int condSize = conditionArray.size();
                            for (int iCond = 0; iCond < condSize; iCond++) {
                                final JsonElement childCond = conditionArray.get(iCond);
                                if (childCond.isJsonObject()) {
                                    final JsonObject condObj = childCond.getAsJsonObject();
                                    final String condClass = condObj.get("class").getAsString();
                                    if (Objects.equals(condClass, "org.apache.spark.sql.catalyst.analysis.UnresolvedFunction")) {
                                        final String udfName = condObj.get("name").getAsJsonObject().get("funcName").getAsString();
                                        final UDFNameAndConstructor geomUDF = UDFRegistrySPI.findFunctionByName(udfName);
                                        if (geomUDF != null) {
                                            final ExtractedGeomPredicate relevantPredicate = new ExtractedGeomPredicate();
                                            relevantPredicate.predicate = geomUDF.getPredicateConstructor().get();
                                            relevantPredicate.predicateName = udfName;
                                            extractedPredicates.add(relevantPredicate);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    // would indicate a spatial join
    if (extractedPredicates.size() == 1) {
        // This pattern detects the word where outside of quoted areas and
        // captures it in group 2
        final Pattern whereDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bWHERE\\b)");
        final Pattern andOrDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bAND|OR\\b)");
        final Pattern orderGroupDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bORDER BY|GROUP BY\\b)");
        final Matcher filterStart = getFirstPositiveMatcher(whereDetect, sql);
        if (filterStart == null) {
            LOGGER.error("There should be a where clause matching the pattern. Running default SQL");
            return runDefaultSQL();
        }
        final int whereStart = filterStart.start(2);
        int whereEnd = sql.length();
        final Matcher filterEnd = getFirstPositiveMatcher(orderGroupDetect, sql.substring(whereStart));
        if (filterEnd != null) {
            whereEnd = filterEnd.start(2);
        }
        final String filterClause = sql.substring(whereStart, whereEnd);
        LOGGER.warn("Extracted Filter Clause: " + filterClause);
        final Matcher compoundFilter = getFirstPositiveMatcher(andOrDetect, filterClause);
        if (compoundFilter != null) {
            LOGGER.warn("Compound conditional detected can result in multiple joins. Too complex to plan in current context. Running default sql");
            return runDefaultSQL();
        }
        final ExtractedGeomPredicate pred = extractedPredicates.get(0);
        // Parse filter string for predicate location
        final int functionPos = filterClause.indexOf(pred.predicateName);
        final int funcArgStart = filterClause.indexOf("(", functionPos);
        final int funcArgEnd = filterClause.indexOf(")", funcArgStart);
        String funcArgs = filterClause.substring(funcArgStart + 1, funcArgEnd);
        funcArgs = funcArgs.replaceAll("\\s", "");
        LOGGER.warn("Function Args: " + funcArgs);
        final String[] args = funcArgs.split(Pattern.quote(","));
        if (args.length == 2) {
            // Determine valid table relations that map to input stores
            final String[] tableRelations = getTableRelations(args);
            pred.leftTableRelation = tableRelations[0];
            pred.rightTableRelation = tableRelations[1];
        }
        if ((pred.leftTableRelation == null) || (pred.rightTableRelation == null)) {
            LOGGER.warn("Cannot translate table identifier to geowave rdd for join.");
            return runDefaultSQL();
        }
        // Extract radius for distance join from condition
        boolean negativePredicate = false;
        if (Objects.equals(pred.predicateName, "GeomDistance")) {
            // Look ahead two tokens for logical operand and scalar|boolean
            final String afterFunc = filterClause.substring(funcArgEnd + 1);
            final String[] tokens = afterFunc.split(" ");
            double radius = 0.0;
            if (tokens.length < 2) {
                LOGGER.warn("Could not extract radius for distance join. Running default SQL");
                return runDefaultSQL();
            } else {
                final String logicalOperand = tokens[0].trim();
                if ((logicalOperand.equals(">")) || (logicalOperand.equals(">="))) {
                    negativePredicate = true;
                }
                final String radiusStr = tokens[1].trim();
                if (!org.apache.commons.lang3.math.NumberUtils.isNumber(radiusStr)) {
                    LOGGER.warn("Could not extract radius for distance join. Running default SQL");
                    return runDefaultSQL();
                } else {
                    final Double r = org.apache.commons.lang3.math.NumberUtils.createDouble(radiusStr);
                    if (r == null) {
                        LOGGER.warn("Could not extract radius for distance join. Running default SQL");
                        return runDefaultSQL();
                    }
                    radius = r.doubleValue();
                }
            }
            ((GeomWithinDistance) pred.predicate).setRadius(radius);
        }
        // At this point we are performing a join
        final SpatialJoinRunner joinRunner = new SpatialJoinRunner(session);
        // Collect input store info for join
        final InputStoreInfo leftStore = inputStores.get(pred.leftTableRelation);
        final InputStoreInfo rightStore = inputStores.get(pred.rightTableRelation);
        joinRunner.setNegativeTest(negativePredicate);
        // Setup store info for runner
        final AdapterToIndexMapping[] leftMappings = leftStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(leftStore.getOrCreateInternalAdapterStore().getAdapterId(leftStore.typeName));
        final AdapterToIndexMapping[] rightMappings = rightStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(rightStore.getOrCreateInternalAdapterStore().getAdapterId(rightStore.typeName));
        NumericIndexStrategy leftStrat = null;
        if (leftMappings.length > 0) {
            leftStrat = leftMappings[0].getIndex(leftStore.getOrCreateIndexStore()).getIndexStrategy();
        }
        NumericIndexStrategy rightStrat = null;
        if (rightMappings.length > 0) {
            rightStrat = rightMappings[0].getIndex(rightStore.getOrCreateIndexStore()).getIndexStrategy();
        }
        joinRunner.setLeftRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), leftStore.rdd, leftStrat));
        joinRunner.setRightRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), rightStore.rdd, rightStrat));
        joinRunner.setPredicate(pred.predicate);
        joinRunner.setLeftStore(leftStore.storeOptions);
        joinRunner.setRightStore(rightStore.storeOptions);
        // Execute the join
        joinRunner.run();
        // Load results into dataframes and replace original views with
        // joined views
        final SimpleFeatureDataFrame leftResultFrame = new SimpleFeatureDataFrame(session);
        final SimpleFeatureDataFrame rightResultFrame = new SimpleFeatureDataFrame(session);
        leftResultFrame.init(leftStore.storeOptions, leftStore.typeName);
        rightResultFrame.init(rightStore.storeOptions, rightStore.typeName);
        final Dataset<Row> leftFrame = leftResultFrame.getDataFrame(joinRunner.getLeftResults());
        final Dataset<Row> rightFrame = rightResultFrame.getDataFrame(joinRunner.getRightResults());
        leftFrame.createOrReplaceTempView(leftStore.viewName);
        rightFrame.createOrReplaceTempView(rightStore.viewName);
    }
    // Run the remaining query through the session sql runner.
    // This will likely attempt to regenerate the join, but should reuse the
    // pairs generated from optimized join beforehand
    final Dataset<Row> results = session.sql(sql);
    return results;
}
Also used : Pattern(java.util.regex.Pattern) GeomWithinDistance(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomWithinDistance) Matcher(java.util.regex.Matcher) JsonObject(com.google.gson.JsonObject) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) SpatialJoinRunner(org.locationtech.geowave.analytic.spark.spatial.SpatialJoinRunner) JsonArray(com.google.gson.JsonArray) UDFNameAndConstructor(org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor) JsonElement(com.google.gson.JsonElement) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) Row(org.apache.spark.sql.Row) JsonParser(com.google.gson.JsonParser) NumericIndexStrategy(org.locationtech.geowave.core.index.NumericIndexStrategy)

Example 2 with UDFNameAndConstructor

use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.

the class GeomFunctionRegistry method registerGeometryFunctions.

public static void registerGeometryFunctions(final SparkSession spark) {
    // Distance UDF is only exception to GeomFunction interface since it
    // returns Double
    spark.udf().register("GeomDistance", geomDistanceInstance, DataTypes.DoubleType);
    spark.udf().register("GeomFromWKT", geomWKTInstance, GeoWaveSpatialEncoders.geometryUDT);
    // Register all UDF functions from RegistrySPI
    final UDFNameAndConstructor[] supportedUDFs = UDFRegistrySPI.getSupportedUDFs();
    for (int iUDF = 0; iUDF < supportedUDFs.length; iUDF += 1) {
        final UDFNameAndConstructor udf = supportedUDFs[iUDF];
        final GeomFunction funcInstance = udf.getPredicateConstructor().get();
        spark.udf().register(funcInstance.getRegisterName(), funcInstance, DataTypes.BooleanType);
    }
}
Also used : UDFNameAndConstructor(org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor)

Example 3 with UDFNameAndConstructor

use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.

the class SpatialJoinCommand method computeResults.

@Override
public Void computeResults(final OperationParams params) throws Exception {
    final String leftStoreName = parameters.get(0);
    final String rightStoreName = parameters.get(1);
    final String outputStoreName = parameters.get(2);
    // Config file
    final File configFile = getGeoWaveConfigFile(params);
    // Attempt to load stores.
    if (leftDataStore == null) {
        leftDataStore = CLIUtils.loadStore(leftStoreName, configFile, params.getConsole());
    }
    if (rightDataStore == null) {
        rightDataStore = CLIUtils.loadStore(rightStoreName, configFile, params.getConsole());
    }
    if (outputDataStore == null) {
        outputDataStore = CLIUtils.loadStore(outputStoreName, configFile, params.getConsole());
    }
    // Save a reference to the output store in the property management.
    final PersistableStore persistedStore = new PersistableStore(outputDataStore);
    final PropertyManagement properties = new PropertyManagement();
    properties.store(StoreParameters.StoreParam.OUTPUT_STORE, persistedStore);
    // Convert properties from DBScanOptions and CommonOptions
    final PropertyManagementConverter converter = new PropertyManagementConverter(properties);
    converter.readProperties(spatialJoinOptions);
    // TODO: Create GeomPredicate function from name
    final UDFNameAndConstructor udfFunc = UDFRegistrySPI.findFunctionByName(spatialJoinOptions.getPredicate());
    if (udfFunc == null) {
        throw new ParameterException("UDF function matching " + spatialJoinOptions.getPredicate() + " not found.");
    }
    final GeomFunction predicate = udfFunc.getPredicateConstructor().get();
    // Special case for distance function since it takes a scalar radius.
    if (predicate instanceof GeomWithinDistance) {
        ((GeomWithinDistance) predicate).setRadius(spatialJoinOptions.getRadius());
    }
    final SpatialJoinRunner runner = new SpatialJoinRunner();
    runner.setAppName(spatialJoinOptions.getAppName());
    runner.setMaster(spatialJoinOptions.getMaster());
    runner.setHost(spatialJoinOptions.getHost());
    runner.setPartCount(spatialJoinOptions.getPartCount());
    runner.setPredicate(predicate);
    // set DataStore options for runner
    runner.setLeftStore(leftDataStore);
    if (spatialJoinOptions.getLeftAdapterTypeName() != null) {
        runner.setLeftAdapterTypeName(spatialJoinOptions.getLeftAdapterTypeName());
    }
    runner.setRightStore(rightDataStore);
    if (spatialJoinOptions.getRightAdapterTypeName() != null) {
        runner.setRightAdapterTypeName(spatialJoinOptions.getRightAdapterTypeName());
    }
    runner.setOutputStore(outputDataStore);
    if (spatialJoinOptions.getOutputLeftAdapterTypeName() != null) {
        runner.setOutputLeftAdapterTypeName(spatialJoinOptions.getOutputLeftAdapterTypeName());
    }
    if (spatialJoinOptions.getOutputRightAdapterTypeName() != null) {
        runner.setOutputRightAdapterTypeName(spatialJoinOptions.getOutputRightAdapterTypeName());
    }
    runner.setNegativeTest(spatialJoinOptions.isNegativeTest());
    // Finally call run to execute the join
    runner.run();
    runner.close();
    return null;
}
Also used : GeomFunction(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomFunction) GeomWithinDistance(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomWithinDistance) UDFNameAndConstructor(org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor) PropertyManagementConverter(org.locationtech.geowave.analytic.mapreduce.operations.options.PropertyManagementConverter) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) ParameterException(com.beust.jcommander.ParameterException) File(java.io.File) SpatialJoinRunner(org.locationtech.geowave.analytic.spark.spatial.SpatialJoinRunner)

Aggregations

UDFNameAndConstructor (org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor)3 GeomWithinDistance (org.locationtech.geowave.analytic.spark.sparksql.udf.GeomWithinDistance)2 SpatialJoinRunner (org.locationtech.geowave.analytic.spark.spatial.SpatialJoinRunner)2 ParameterException (com.beust.jcommander.ParameterException)1 JsonArray (com.google.gson.JsonArray)1 JsonElement (com.google.gson.JsonElement)1 JsonObject (com.google.gson.JsonObject)1 JsonParser (com.google.gson.JsonParser)1 File (java.io.File)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Row (org.apache.spark.sql.Row)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)1 PropertyManagementConverter (org.locationtech.geowave.analytic.mapreduce.operations.options.PropertyManagementConverter)1 GeomFunction (org.locationtech.geowave.analytic.spark.sparksql.udf.GeomFunction)1 PersistableStore (org.locationtech.geowave.analytic.store.PersistableStore)1 NumericIndexStrategy (org.locationtech.geowave.core.index.NumericIndexStrategy)1 AdapterToIndexMapping (org.locationtech.geowave.core.store.AdapterToIndexMapping)1