use of org.locationtech.geowave.core.index.NumericIndexStrategy in project geowave by locationtech.
the class SqlQueryRunner method run.
public Dataset<Row> run() throws IOException, InterruptedException, ExecutionException, ParseException {
initContext();
// Load stores and create views.
loadStoresAndViews();
// Create a version of the sql without string literals to check for
// subquery syntax in sql statement.
final Pattern stringLit = Pattern.compile("(?:\\'|\\\").*?(?:\\'|\\\")");
final Matcher m = stringLit.matcher(sql);
final String cleanedSql = m.replaceAll("");
LOGGER.debug("cleaned SQL statement: " + cleanedSql);
// injecting a optimized join into the process
if (!cleanedSql.matches("(?i)^(?=(?:.*(?:\\b(?:INSERT INTO|UPDATE|SELECT|WITH|DELETE|CREATE TABLE|ALTER TABLE|DROP TABLE)\\b)){2})")) {
// Parse sparks logical plan for query and determine if spatial join
// is present
LogicalPlan plan = null;
plan = session.sessionState().sqlParser().parsePlan(sql);
final JsonParser gsonParser = new JsonParser();
final JsonElement jElement = gsonParser.parse(plan.prettyJson());
if (jElement.isJsonArray()) {
final JsonArray jArray = jElement.getAsJsonArray();
final int size = jArray.size();
for (int iObj = 0; iObj < size; iObj++) {
final JsonElement childElement = jArray.get(iObj);
if (childElement.isJsonObject()) {
final JsonObject jObj = childElement.getAsJsonObject();
final String objClass = jObj.get("class").getAsString();
if (Objects.equals(objClass, "org.apache.spark.sql.catalyst.plans.logical.Filter")) {
// Search through filter Object to determine if
// GeomPredicate function present in condition.
final JsonElement conditionElements = jObj.get("condition");
if (conditionElements.isJsonArray()) {
final JsonArray conditionArray = conditionElements.getAsJsonArray();
final int condSize = conditionArray.size();
for (int iCond = 0; iCond < condSize; iCond++) {
final JsonElement childCond = conditionArray.get(iCond);
if (childCond.isJsonObject()) {
final JsonObject condObj = childCond.getAsJsonObject();
final String condClass = condObj.get("class").getAsString();
if (Objects.equals(condClass, "org.apache.spark.sql.catalyst.analysis.UnresolvedFunction")) {
final String udfName = condObj.get("name").getAsJsonObject().get("funcName").getAsString();
final UDFNameAndConstructor geomUDF = UDFRegistrySPI.findFunctionByName(udfName);
if (geomUDF != null) {
final ExtractedGeomPredicate relevantPredicate = new ExtractedGeomPredicate();
relevantPredicate.predicate = geomUDF.getPredicateConstructor().get();
relevantPredicate.predicateName = udfName;
extractedPredicates.add(relevantPredicate);
}
}
}
}
}
}
}
}
}
}
// would indicate a spatial join
if (extractedPredicates.size() == 1) {
// This pattern detects the word where outside of quoted areas and
// captures it in group 2
final Pattern whereDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bWHERE\\b)");
final Pattern andOrDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bAND|OR\\b)");
final Pattern orderGroupDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bORDER BY|GROUP BY\\b)");
final Matcher filterStart = getFirstPositiveMatcher(whereDetect, sql);
if (filterStart == null) {
LOGGER.error("There should be a where clause matching the pattern. Running default SQL");
return runDefaultSQL();
}
final int whereStart = filterStart.start(2);
int whereEnd = sql.length();
final Matcher filterEnd = getFirstPositiveMatcher(orderGroupDetect, sql.substring(whereStart));
if (filterEnd != null) {
whereEnd = filterEnd.start(2);
}
final String filterClause = sql.substring(whereStart, whereEnd);
LOGGER.warn("Extracted Filter Clause: " + filterClause);
final Matcher compoundFilter = getFirstPositiveMatcher(andOrDetect, filterClause);
if (compoundFilter != null) {
LOGGER.warn("Compound conditional detected can result in multiple joins. Too complex to plan in current context. Running default sql");
return runDefaultSQL();
}
final ExtractedGeomPredicate pred = extractedPredicates.get(0);
// Parse filter string for predicate location
final int functionPos = filterClause.indexOf(pred.predicateName);
final int funcArgStart = filterClause.indexOf("(", functionPos);
final int funcArgEnd = filterClause.indexOf(")", funcArgStart);
String funcArgs = filterClause.substring(funcArgStart + 1, funcArgEnd);
funcArgs = funcArgs.replaceAll("\\s", "");
LOGGER.warn("Function Args: " + funcArgs);
final String[] args = funcArgs.split(Pattern.quote(","));
if (args.length == 2) {
// Determine valid table relations that map to input stores
final String[] tableRelations = getTableRelations(args);
pred.leftTableRelation = tableRelations[0];
pred.rightTableRelation = tableRelations[1];
}
if ((pred.leftTableRelation == null) || (pred.rightTableRelation == null)) {
LOGGER.warn("Cannot translate table identifier to geowave rdd for join.");
return runDefaultSQL();
}
// Extract radius for distance join from condition
boolean negativePredicate = false;
if (Objects.equals(pred.predicateName, "GeomDistance")) {
// Look ahead two tokens for logical operand and scalar|boolean
final String afterFunc = filterClause.substring(funcArgEnd + 1);
final String[] tokens = afterFunc.split(" ");
double radius = 0.0;
if (tokens.length < 2) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final String logicalOperand = tokens[0].trim();
if ((logicalOperand.equals(">")) || (logicalOperand.equals(">="))) {
negativePredicate = true;
}
final String radiusStr = tokens[1].trim();
if (!org.apache.commons.lang3.math.NumberUtils.isNumber(radiusStr)) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final Double r = org.apache.commons.lang3.math.NumberUtils.createDouble(radiusStr);
if (r == null) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
}
radius = r.doubleValue();
}
}
((GeomWithinDistance) pred.predicate).setRadius(radius);
}
// At this point we are performing a join
final SpatialJoinRunner joinRunner = new SpatialJoinRunner(session);
// Collect input store info for join
final InputStoreInfo leftStore = inputStores.get(pred.leftTableRelation);
final InputStoreInfo rightStore = inputStores.get(pred.rightTableRelation);
joinRunner.setNegativeTest(negativePredicate);
// Setup store info for runner
final AdapterToIndexMapping[] leftMappings = leftStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(leftStore.getOrCreateInternalAdapterStore().getAdapterId(leftStore.typeName));
final AdapterToIndexMapping[] rightMappings = rightStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(rightStore.getOrCreateInternalAdapterStore().getAdapterId(rightStore.typeName));
NumericIndexStrategy leftStrat = null;
if (leftMappings.length > 0) {
leftStrat = leftMappings[0].getIndex(leftStore.getOrCreateIndexStore()).getIndexStrategy();
}
NumericIndexStrategy rightStrat = null;
if (rightMappings.length > 0) {
rightStrat = rightMappings[0].getIndex(rightStore.getOrCreateIndexStore()).getIndexStrategy();
}
joinRunner.setLeftRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), leftStore.rdd, leftStrat));
joinRunner.setRightRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), rightStore.rdd, rightStrat));
joinRunner.setPredicate(pred.predicate);
joinRunner.setLeftStore(leftStore.storeOptions);
joinRunner.setRightStore(rightStore.storeOptions);
// Execute the join
joinRunner.run();
// Load results into dataframes and replace original views with
// joined views
final SimpleFeatureDataFrame leftResultFrame = new SimpleFeatureDataFrame(session);
final SimpleFeatureDataFrame rightResultFrame = new SimpleFeatureDataFrame(session);
leftResultFrame.init(leftStore.storeOptions, leftStore.typeName);
rightResultFrame.init(rightStore.storeOptions, rightStore.typeName);
final Dataset<Row> leftFrame = leftResultFrame.getDataFrame(joinRunner.getLeftResults());
final Dataset<Row> rightFrame = rightResultFrame.getDataFrame(joinRunner.getRightResults());
leftFrame.createOrReplaceTempView(leftStore.viewName);
rightFrame.createOrReplaceTempView(rightStore.viewName);
}
// Run the remaining query through the session sql runner.
// This will likely attempt to regenerate the join, but should reuse the
// pairs generated from optimized join beforehand
final Dataset<Row> results = session.sql(sql);
return results;
}
use of org.locationtech.geowave.core.index.NumericIndexStrategy in project geowave by locationtech.
the class SpatialJoinRunner method createRDDFromOptions.
private GeoWaveIndexedRDD createRDDFromOptions(final DataStorePluginOptions storeOptions, String adapterTypeName, final InternalAdapterStore internalAdapterStore, final IndexStore indexStore) throws IOException {
// available.
if (adapterTypeName == null) {
final List<String> typeNames = FeatureDataUtils.getFeatureTypeNames(storeOptions);
if (!typeNames.isEmpty()) {
adapterTypeName = typeNames.get(0);
} else {
LOGGER.error("No valid adapter found in store to perform join.");
return null;
}
}
final RDDOptions rddOpts = new RDDOptions();
rddOpts.setQuery(QueryBuilder.newBuilder().addTypeName(adapterTypeName).build());
rddOpts.setMinSplits(partCount);
rddOpts.setMaxSplits(partCount);
NumericIndexStrategy rddStrategy = null;
// Did the user provide a strategy for join?
if (indexStrategy == null) {
final Index[] rddIndices = getIndicesForAdapter(storeOptions, adapterTypeName, internalAdapterStore, indexStore);
if (rddIndices.length > 0) {
rddStrategy = rddIndices[0].getIndexStrategy();
}
} else {
rddStrategy = indexStrategy;
}
return GeoWaveRDDLoader.loadIndexedRDD(sc, storeOptions, rddOpts, rddStrategy);
}
use of org.locationtech.geowave.core.index.NumericIndexStrategy in project geowave by locationtech.
the class NumericAttributeIndexProvider method buildIndex.
@Override
public AttributeIndex buildIndex(final String indexName, final DataTypeAdapter<?> adapter, final FieldDescriptor<?> fieldDescriptor) {
final Class<?> bindingClass = fieldDescriptor.bindingClass();
final String fieldName = fieldDescriptor.fieldName();
final NumericIndexStrategy indexStrategy;
final CommonIndexModel indexModel;
if (Byte.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleByteIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Byte.class) });
} else if (Short.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleShortIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Short.class) });
} else if (Integer.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleIntegerIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Integer.class) });
} else if (Long.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleLongIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Long.class) });
} else if (Float.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleFloatIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Float.class) });
} else if (Double.class.isAssignableFrom(bindingClass)) {
indexStrategy = new SimpleDoubleIndexStrategy();
indexModel = new BasicIndexModel(new NumericDimensionField[] { new BasicNumericDimensionField<>(fieldName, Double.class) });
} else {
throw new ParameterException("Unsupported numeric attribute index class: " + bindingClass.getName());
}
return new AttributeIndexImpl(indexStrategy, indexModel, indexName, fieldName);
}
use of org.locationtech.geowave.core.index.NumericIndexStrategy in project geowave by locationtech.
the class CompoundHierarchicalIndexStrategyWrapper method getSubStrategies.
@Override
public SubStrategy[] getSubStrategies() {
// for these substrategies we need to replace the last parent strategy's
// hierarchical index strategy with the underlying substrategy index
// strategy
final SubStrategy[] subStrategies = firstHierarchicalStrategy.getSubStrategies();
final SubStrategy[] retVal = new SubStrategy[subStrategies.length];
for (int i = 0; i < subStrategies.length; i++) {
NumericIndexStrategy currentStrategyToBeReplaced = firstHierarchicalStrategy;
NumericIndexStrategy currentStrategyReplacement = subStrategies[i].getIndexStrategy();
for (int j = parentStrategies.size() - 1; j >= 0; j--) {
// traverse parents in reverse order
final CompoundIndexStrategy parent = parentStrategies.get(j);
if (parent.getPrimarySubStrategy().equals(currentStrategyToBeReplaced)) {
// replace primary
currentStrategyReplacement = new CompoundIndexStrategy(currentStrategyReplacement, parent.getSecondarySubStrategy());
} else {
// replace secondary
currentStrategyReplacement = new CompoundIndexStrategy(parent.getPrimarySubStrategy(), currentStrategyReplacement);
}
currentStrategyToBeReplaced = parent;
}
retVal[i] = new SubStrategy(currentStrategyReplacement, subStrategies[i].getPrefix());
}
return retVal;
}
use of org.locationtech.geowave.core.index.NumericIndexStrategy in project geowave by locationtech.
the class TieredSFCIndexStrategyTest method testPredefinedSpatialEntries.
@Test
public void testPredefinedSpatialEntries() throws Exception {
final NumericIndexStrategy strategy = TieredSFCIndexFactory.createDefinedPrecisionTieredStrategy(new NumericDimensionDefinition[] { new LongitudeDefinition(), new LatitudeDefinition(true) }, new int[][] { DEFINED_BITS_OF_PRECISION.clone(), DEFINED_BITS_OF_PRECISION.clone() }, SFCType.HILBERT);
for (int sfcIndex = 0; sfcIndex < DEFINED_BITS_OF_PRECISION.length; sfcIndex++) {
final NumericData[] dataPerDimension = new NumericData[2];
final double precision = 360 / Math.pow(2, DEFINED_BITS_OF_PRECISION[sfcIndex]);
if (precision > 180) {
dataPerDimension[0] = new NumericRange(-180, 180);
dataPerDimension[1] = new NumericRange(-90, 90);
} else {
dataPerDimension[0] = new NumericRange(0, precision);
dataPerDimension[1] = new NumericRange(-precision, 0);
}
final MultiDimensionalNumericData indexedData = new BasicNumericDataset(dataPerDimension);
final InsertionIds ids = strategy.getInsertionIds(indexedData);
final NumericData[] queryRangePerDimension = new NumericData[2];
queryRangePerDimension[0] = new NumericRange(dataPerDimension[0].getMin() + QUERY_RANGE_EPSILON, dataPerDimension[0].getMax() - QUERY_RANGE_EPSILON);
queryRangePerDimension[1] = new NumericRange(dataPerDimension[1].getMin() + QUERY_RANGE_EPSILON, dataPerDimension[1].getMax() - QUERY_RANGE_EPSILON);
final MultiDimensionalNumericData queryData = new BasicNumericDataset(queryRangePerDimension);
final QueryRanges queryRanges = strategy.getQueryRanges(queryData);
final Set<Byte> queryRangeTiers = new HashSet<>();
boolean rangeAtTierFound = false;
for (final ByteArrayRange range : queryRanges.getCompositeQueryRanges()) {
final byte tier = range.getStart()[0];
queryRangeTiers.add(range.getStart()[0]);
if (tier == DEFINED_BITS_OF_PRECISION[sfcIndex]) {
if (rangeAtTierFound) {
throw new Exception("multiple ranges were found unexpectedly for tier " + tier);
}
assertArrayEquals("this range is an exact fit, so it should have exactly one value for tier " + DEFINED_BITS_OF_PRECISION[sfcIndex], range.getStart(), range.getEnd());
rangeAtTierFound = true;
}
}
if (!rangeAtTierFound) {
throw new Exception("no ranges were found at the expected exact fit tier " + DEFINED_BITS_OF_PRECISION[sfcIndex]);
}
// of precision
if ((ids.getCompositeInsertionIds().get(0)[0] == 0) || ((sfcIndex == (DEFINED_BITS_OF_PRECISION.length - 1)) || (DEFINED_BITS_OF_PRECISION[sfcIndex + 1] != (DEFINED_BITS_OF_PRECISION[sfcIndex] + 1)))) {
assertEquals("Insertion ID expected to be exact match at tier " + DEFINED_BITS_OF_PRECISION[sfcIndex], DEFINED_BITS_OF_PRECISION[sfcIndex], ids.getCompositeInsertionIds().get(0)[0]);
assertEquals("Insertion ID size expected to be 1 at tier " + DEFINED_BITS_OF_PRECISION[sfcIndex], 1, ids.getCompositeInsertionIds().size());
} else {
assertEquals("Insertion ID expected to be duplicated at tier " + DEFINED_BITS_OF_PRECISION[sfcIndex + 1], DEFINED_BITS_OF_PRECISION[sfcIndex + 1], ids.getCompositeInsertionIds().get(0)[0]);
// if the precision is within the bounds of longitude but not
// within latitude we will end up with 2 (rectangular
// decomposition)
// otherwise we will get a square decomposition of 4 ids
final int expectedIds = (precision > 90) && (precision <= 180) ? 2 : 4;
assertEquals("Insertion ID size expected to be " + expectedIds + " at tier " + DEFINED_BITS_OF_PRECISION[sfcIndex + 1], expectedIds, ids.getCompositeInsertionIds().size());
}
}
}
Aggregations