use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.
the class SqlQueryRunner method run.
public Dataset<Row> run() throws IOException, InterruptedException, ExecutionException, ParseException {
initContext();
// Load stores and create views.
loadStoresAndViews();
// Create a version of the sql without string literals to check for
// subquery syntax in sql statement.
final Pattern stringLit = Pattern.compile("(?:\\'|\\\").*?(?:\\'|\\\")");
final Matcher m = stringLit.matcher(sql);
final String cleanedSql = m.replaceAll("");
LOGGER.debug("cleaned SQL statement: " + cleanedSql);
// injecting a optimized join into the process
if (!cleanedSql.matches("(?i)^(?=(?:.*(?:\\b(?:INSERT INTO|UPDATE|SELECT|WITH|DELETE|CREATE TABLE|ALTER TABLE|DROP TABLE)\\b)){2})")) {
// Parse sparks logical plan for query and determine if spatial join
// is present
LogicalPlan plan = null;
plan = session.sessionState().sqlParser().parsePlan(sql);
final JsonParser gsonParser = new JsonParser();
final JsonElement jElement = gsonParser.parse(plan.prettyJson());
if (jElement.isJsonArray()) {
final JsonArray jArray = jElement.getAsJsonArray();
final int size = jArray.size();
for (int iObj = 0; iObj < size; iObj++) {
final JsonElement childElement = jArray.get(iObj);
if (childElement.isJsonObject()) {
final JsonObject jObj = childElement.getAsJsonObject();
final String objClass = jObj.get("class").getAsString();
if (Objects.equals(objClass, "org.apache.spark.sql.catalyst.plans.logical.Filter")) {
// Search through filter Object to determine if
// GeomPredicate function present in condition.
final JsonElement conditionElements = jObj.get("condition");
if (conditionElements.isJsonArray()) {
final JsonArray conditionArray = conditionElements.getAsJsonArray();
final int condSize = conditionArray.size();
for (int iCond = 0; iCond < condSize; iCond++) {
final JsonElement childCond = conditionArray.get(iCond);
if (childCond.isJsonObject()) {
final JsonObject condObj = childCond.getAsJsonObject();
final String condClass = condObj.get("class").getAsString();
if (Objects.equals(condClass, "org.apache.spark.sql.catalyst.analysis.UnresolvedFunction")) {
final String udfName = condObj.get("name").getAsJsonObject().get("funcName").getAsString();
final UDFNameAndConstructor geomUDF = UDFRegistrySPI.findFunctionByName(udfName);
if (geomUDF != null) {
final ExtractedGeomPredicate relevantPredicate = new ExtractedGeomPredicate();
relevantPredicate.predicate = geomUDF.getPredicateConstructor().get();
relevantPredicate.predicateName = udfName;
extractedPredicates.add(relevantPredicate);
}
}
}
}
}
}
}
}
}
}
// would indicate a spatial join
if (extractedPredicates.size() == 1) {
// This pattern detects the word where outside of quoted areas and
// captures it in group 2
final Pattern whereDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bWHERE\\b)");
final Pattern andOrDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bAND|OR\\b)");
final Pattern orderGroupDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bORDER BY|GROUP BY\\b)");
final Matcher filterStart = getFirstPositiveMatcher(whereDetect, sql);
if (filterStart == null) {
LOGGER.error("There should be a where clause matching the pattern. Running default SQL");
return runDefaultSQL();
}
final int whereStart = filterStart.start(2);
int whereEnd = sql.length();
final Matcher filterEnd = getFirstPositiveMatcher(orderGroupDetect, sql.substring(whereStart));
if (filterEnd != null) {
whereEnd = filterEnd.start(2);
}
final String filterClause = sql.substring(whereStart, whereEnd);
LOGGER.warn("Extracted Filter Clause: " + filterClause);
final Matcher compoundFilter = getFirstPositiveMatcher(andOrDetect, filterClause);
if (compoundFilter != null) {
LOGGER.warn("Compound conditional detected can result in multiple joins. Too complex to plan in current context. Running default sql");
return runDefaultSQL();
}
final ExtractedGeomPredicate pred = extractedPredicates.get(0);
// Parse filter string for predicate location
final int functionPos = filterClause.indexOf(pred.predicateName);
final int funcArgStart = filterClause.indexOf("(", functionPos);
final int funcArgEnd = filterClause.indexOf(")", funcArgStart);
String funcArgs = filterClause.substring(funcArgStart + 1, funcArgEnd);
funcArgs = funcArgs.replaceAll("\\s", "");
LOGGER.warn("Function Args: " + funcArgs);
final String[] args = funcArgs.split(Pattern.quote(","));
if (args.length == 2) {
// Determine valid table relations that map to input stores
final String[] tableRelations = getTableRelations(args);
pred.leftTableRelation = tableRelations[0];
pred.rightTableRelation = tableRelations[1];
}
if ((pred.leftTableRelation == null) || (pred.rightTableRelation == null)) {
LOGGER.warn("Cannot translate table identifier to geowave rdd for join.");
return runDefaultSQL();
}
// Extract radius for distance join from condition
boolean negativePredicate = false;
if (Objects.equals(pred.predicateName, "GeomDistance")) {
// Look ahead two tokens for logical operand and scalar|boolean
final String afterFunc = filterClause.substring(funcArgEnd + 1);
final String[] tokens = afterFunc.split(" ");
double radius = 0.0;
if (tokens.length < 2) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final String logicalOperand = tokens[0].trim();
if ((logicalOperand.equals(">")) || (logicalOperand.equals(">="))) {
negativePredicate = true;
}
final String radiusStr = tokens[1].trim();
if (!org.apache.commons.lang3.math.NumberUtils.isNumber(radiusStr)) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final Double r = org.apache.commons.lang3.math.NumberUtils.createDouble(radiusStr);
if (r == null) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
}
radius = r.doubleValue();
}
}
((GeomWithinDistance) pred.predicate).setRadius(radius);
}
// At this point we are performing a join
final SpatialJoinRunner joinRunner = new SpatialJoinRunner(session);
// Collect input store info for join
final InputStoreInfo leftStore = inputStores.get(pred.leftTableRelation);
final InputStoreInfo rightStore = inputStores.get(pred.rightTableRelation);
joinRunner.setNegativeTest(negativePredicate);
// Setup store info for runner
final AdapterToIndexMapping[] leftMappings = leftStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(leftStore.getOrCreateInternalAdapterStore().getAdapterId(leftStore.typeName));
final AdapterToIndexMapping[] rightMappings = rightStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(rightStore.getOrCreateInternalAdapterStore().getAdapterId(rightStore.typeName));
NumericIndexStrategy leftStrat = null;
if (leftMappings.length > 0) {
leftStrat = leftMappings[0].getIndex(leftStore.getOrCreateIndexStore()).getIndexStrategy();
}
NumericIndexStrategy rightStrat = null;
if (rightMappings.length > 0) {
rightStrat = rightMappings[0].getIndex(rightStore.getOrCreateIndexStore()).getIndexStrategy();
}
joinRunner.setLeftRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), leftStore.rdd, leftStrat));
joinRunner.setRightRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), rightStore.rdd, rightStrat));
joinRunner.setPredicate(pred.predicate);
joinRunner.setLeftStore(leftStore.storeOptions);
joinRunner.setRightStore(rightStore.storeOptions);
// Execute the join
joinRunner.run();
// Load results into dataframes and replace original views with
// joined views
final SimpleFeatureDataFrame leftResultFrame = new SimpleFeatureDataFrame(session);
final SimpleFeatureDataFrame rightResultFrame = new SimpleFeatureDataFrame(session);
leftResultFrame.init(leftStore.storeOptions, leftStore.typeName);
rightResultFrame.init(rightStore.storeOptions, rightStore.typeName);
final Dataset<Row> leftFrame = leftResultFrame.getDataFrame(joinRunner.getLeftResults());
final Dataset<Row> rightFrame = rightResultFrame.getDataFrame(joinRunner.getRightResults());
leftFrame.createOrReplaceTempView(leftStore.viewName);
rightFrame.createOrReplaceTempView(rightStore.viewName);
}
// Run the remaining query through the session sql runner.
// This will likely attempt to regenerate the join, but should reuse the
// pairs generated from optimized join beforehand
final Dataset<Row> results = session.sql(sql);
return results;
}
use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.
the class GeomFunctionRegistry method registerGeometryFunctions.
public static void registerGeometryFunctions(final SparkSession spark) {
// Distance UDF is only exception to GeomFunction interface since it
// returns Double
spark.udf().register("GeomDistance", geomDistanceInstance, DataTypes.DoubleType);
spark.udf().register("GeomFromWKT", geomWKTInstance, GeoWaveSpatialEncoders.geometryUDT);
// Register all UDF functions from RegistrySPI
final UDFNameAndConstructor[] supportedUDFs = UDFRegistrySPI.getSupportedUDFs();
for (int iUDF = 0; iUDF < supportedUDFs.length; iUDF += 1) {
final UDFNameAndConstructor udf = supportedUDFs[iUDF];
final GeomFunction funcInstance = udf.getPredicateConstructor().get();
spark.udf().register(funcInstance.getRegisterName(), funcInstance, DataTypes.BooleanType);
}
}
use of org.locationtech.geowave.analytic.spark.sparksql.udf.UDFRegistrySPI.UDFNameAndConstructor in project geowave by locationtech.
the class SpatialJoinCommand method computeResults.
@Override
public Void computeResults(final OperationParams params) throws Exception {
final String leftStoreName = parameters.get(0);
final String rightStoreName = parameters.get(1);
final String outputStoreName = parameters.get(2);
// Config file
final File configFile = getGeoWaveConfigFile(params);
// Attempt to load stores.
if (leftDataStore == null) {
leftDataStore = CLIUtils.loadStore(leftStoreName, configFile, params.getConsole());
}
if (rightDataStore == null) {
rightDataStore = CLIUtils.loadStore(rightStoreName, configFile, params.getConsole());
}
if (outputDataStore == null) {
outputDataStore = CLIUtils.loadStore(outputStoreName, configFile, params.getConsole());
}
// Save a reference to the output store in the property management.
final PersistableStore persistedStore = new PersistableStore(outputDataStore);
final PropertyManagement properties = new PropertyManagement();
properties.store(StoreParameters.StoreParam.OUTPUT_STORE, persistedStore);
// Convert properties from DBScanOptions and CommonOptions
final PropertyManagementConverter converter = new PropertyManagementConverter(properties);
converter.readProperties(spatialJoinOptions);
// TODO: Create GeomPredicate function from name
final UDFNameAndConstructor udfFunc = UDFRegistrySPI.findFunctionByName(spatialJoinOptions.getPredicate());
if (udfFunc == null) {
throw new ParameterException("UDF function matching " + spatialJoinOptions.getPredicate() + " not found.");
}
final GeomFunction predicate = udfFunc.getPredicateConstructor().get();
// Special case for distance function since it takes a scalar radius.
if (predicate instanceof GeomWithinDistance) {
((GeomWithinDistance) predicate).setRadius(spatialJoinOptions.getRadius());
}
final SpatialJoinRunner runner = new SpatialJoinRunner();
runner.setAppName(spatialJoinOptions.getAppName());
runner.setMaster(spatialJoinOptions.getMaster());
runner.setHost(spatialJoinOptions.getHost());
runner.setPartCount(spatialJoinOptions.getPartCount());
runner.setPredicate(predicate);
// set DataStore options for runner
runner.setLeftStore(leftDataStore);
if (spatialJoinOptions.getLeftAdapterTypeName() != null) {
runner.setLeftAdapterTypeName(spatialJoinOptions.getLeftAdapterTypeName());
}
runner.setRightStore(rightDataStore);
if (spatialJoinOptions.getRightAdapterTypeName() != null) {
runner.setRightAdapterTypeName(spatialJoinOptions.getRightAdapterTypeName());
}
runner.setOutputStore(outputDataStore);
if (spatialJoinOptions.getOutputLeftAdapterTypeName() != null) {
runner.setOutputLeftAdapterTypeName(spatialJoinOptions.getOutputLeftAdapterTypeName());
}
if (spatialJoinOptions.getOutputRightAdapterTypeName() != null) {
runner.setOutputRightAdapterTypeName(spatialJoinOptions.getOutputRightAdapterTypeName());
}
runner.setNegativeTest(spatialJoinOptions.isNegativeTest());
// Finally call run to execute the join
runner.run();
runner.close();
return null;
}
Aggregations