use of org.locationtech.geowave.core.store.AdapterToIndexMapping in project geowave by locationtech.
the class SqlQueryRunner method run.
public Dataset<Row> run() throws IOException, InterruptedException, ExecutionException, ParseException {
initContext();
// Load stores and create views.
loadStoresAndViews();
// Create a version of the sql without string literals to check for
// subquery syntax in sql statement.
final Pattern stringLit = Pattern.compile("(?:\\'|\\\").*?(?:\\'|\\\")");
final Matcher m = stringLit.matcher(sql);
final String cleanedSql = m.replaceAll("");
LOGGER.debug("cleaned SQL statement: " + cleanedSql);
// injecting a optimized join into the process
if (!cleanedSql.matches("(?i)^(?=(?:.*(?:\\b(?:INSERT INTO|UPDATE|SELECT|WITH|DELETE|CREATE TABLE|ALTER TABLE|DROP TABLE)\\b)){2})")) {
// Parse sparks logical plan for query and determine if spatial join
// is present
LogicalPlan plan = null;
plan = session.sessionState().sqlParser().parsePlan(sql);
final JsonParser gsonParser = new JsonParser();
final JsonElement jElement = gsonParser.parse(plan.prettyJson());
if (jElement.isJsonArray()) {
final JsonArray jArray = jElement.getAsJsonArray();
final int size = jArray.size();
for (int iObj = 0; iObj < size; iObj++) {
final JsonElement childElement = jArray.get(iObj);
if (childElement.isJsonObject()) {
final JsonObject jObj = childElement.getAsJsonObject();
final String objClass = jObj.get("class").getAsString();
if (Objects.equals(objClass, "org.apache.spark.sql.catalyst.plans.logical.Filter")) {
// Search through filter Object to determine if
// GeomPredicate function present in condition.
final JsonElement conditionElements = jObj.get("condition");
if (conditionElements.isJsonArray()) {
final JsonArray conditionArray = conditionElements.getAsJsonArray();
final int condSize = conditionArray.size();
for (int iCond = 0; iCond < condSize; iCond++) {
final JsonElement childCond = conditionArray.get(iCond);
if (childCond.isJsonObject()) {
final JsonObject condObj = childCond.getAsJsonObject();
final String condClass = condObj.get("class").getAsString();
if (Objects.equals(condClass, "org.apache.spark.sql.catalyst.analysis.UnresolvedFunction")) {
final String udfName = condObj.get("name").getAsJsonObject().get("funcName").getAsString();
final UDFNameAndConstructor geomUDF = UDFRegistrySPI.findFunctionByName(udfName);
if (geomUDF != null) {
final ExtractedGeomPredicate relevantPredicate = new ExtractedGeomPredicate();
relevantPredicate.predicate = geomUDF.getPredicateConstructor().get();
relevantPredicate.predicateName = udfName;
extractedPredicates.add(relevantPredicate);
}
}
}
}
}
}
}
}
}
}
// would indicate a spatial join
if (extractedPredicates.size() == 1) {
// This pattern detects the word where outside of quoted areas and
// captures it in group 2
final Pattern whereDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bWHERE\\b)");
final Pattern andOrDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bAND|OR\\b)");
final Pattern orderGroupDetect = Pattern.compile("(?i)(\"[^\"]*\"|'[^']*')|(\\bORDER BY|GROUP BY\\b)");
final Matcher filterStart = getFirstPositiveMatcher(whereDetect, sql);
if (filterStart == null) {
LOGGER.error("There should be a where clause matching the pattern. Running default SQL");
return runDefaultSQL();
}
final int whereStart = filterStart.start(2);
int whereEnd = sql.length();
final Matcher filterEnd = getFirstPositiveMatcher(orderGroupDetect, sql.substring(whereStart));
if (filterEnd != null) {
whereEnd = filterEnd.start(2);
}
final String filterClause = sql.substring(whereStart, whereEnd);
LOGGER.warn("Extracted Filter Clause: " + filterClause);
final Matcher compoundFilter = getFirstPositiveMatcher(andOrDetect, filterClause);
if (compoundFilter != null) {
LOGGER.warn("Compound conditional detected can result in multiple joins. Too complex to plan in current context. Running default sql");
return runDefaultSQL();
}
final ExtractedGeomPredicate pred = extractedPredicates.get(0);
// Parse filter string for predicate location
final int functionPos = filterClause.indexOf(pred.predicateName);
final int funcArgStart = filterClause.indexOf("(", functionPos);
final int funcArgEnd = filterClause.indexOf(")", funcArgStart);
String funcArgs = filterClause.substring(funcArgStart + 1, funcArgEnd);
funcArgs = funcArgs.replaceAll("\\s", "");
LOGGER.warn("Function Args: " + funcArgs);
final String[] args = funcArgs.split(Pattern.quote(","));
if (args.length == 2) {
// Determine valid table relations that map to input stores
final String[] tableRelations = getTableRelations(args);
pred.leftTableRelation = tableRelations[0];
pred.rightTableRelation = tableRelations[1];
}
if ((pred.leftTableRelation == null) || (pred.rightTableRelation == null)) {
LOGGER.warn("Cannot translate table identifier to geowave rdd for join.");
return runDefaultSQL();
}
// Extract radius for distance join from condition
boolean negativePredicate = false;
if (Objects.equals(pred.predicateName, "GeomDistance")) {
// Look ahead two tokens for logical operand and scalar|boolean
final String afterFunc = filterClause.substring(funcArgEnd + 1);
final String[] tokens = afterFunc.split(" ");
double radius = 0.0;
if (tokens.length < 2) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final String logicalOperand = tokens[0].trim();
if ((logicalOperand.equals(">")) || (logicalOperand.equals(">="))) {
negativePredicate = true;
}
final String radiusStr = tokens[1].trim();
if (!org.apache.commons.lang3.math.NumberUtils.isNumber(radiusStr)) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
} else {
final Double r = org.apache.commons.lang3.math.NumberUtils.createDouble(radiusStr);
if (r == null) {
LOGGER.warn("Could not extract radius for distance join. Running default SQL");
return runDefaultSQL();
}
radius = r.doubleValue();
}
}
((GeomWithinDistance) pred.predicate).setRadius(radius);
}
// At this point we are performing a join
final SpatialJoinRunner joinRunner = new SpatialJoinRunner(session);
// Collect input store info for join
final InputStoreInfo leftStore = inputStores.get(pred.leftTableRelation);
final InputStoreInfo rightStore = inputStores.get(pred.rightTableRelation);
joinRunner.setNegativeTest(negativePredicate);
// Setup store info for runner
final AdapterToIndexMapping[] leftMappings = leftStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(leftStore.getOrCreateInternalAdapterStore().getAdapterId(leftStore.typeName));
final AdapterToIndexMapping[] rightMappings = rightStore.getOrCreateAdapterIndexMappingStore().getIndicesForAdapter(rightStore.getOrCreateInternalAdapterStore().getAdapterId(rightStore.typeName));
NumericIndexStrategy leftStrat = null;
if (leftMappings.length > 0) {
leftStrat = leftMappings[0].getIndex(leftStore.getOrCreateIndexStore()).getIndexStrategy();
}
NumericIndexStrategy rightStrat = null;
if (rightMappings.length > 0) {
rightStrat = rightMappings[0].getIndex(rightStore.getOrCreateIndexStore()).getIndexStrategy();
}
joinRunner.setLeftRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), leftStore.rdd, leftStrat));
joinRunner.setRightRDD(GeoWaveRDDLoader.loadIndexedRDD(session.sparkContext(), rightStore.rdd, rightStrat));
joinRunner.setPredicate(pred.predicate);
joinRunner.setLeftStore(leftStore.storeOptions);
joinRunner.setRightStore(rightStore.storeOptions);
// Execute the join
joinRunner.run();
// Load results into dataframes and replace original views with
// joined views
final SimpleFeatureDataFrame leftResultFrame = new SimpleFeatureDataFrame(session);
final SimpleFeatureDataFrame rightResultFrame = new SimpleFeatureDataFrame(session);
leftResultFrame.init(leftStore.storeOptions, leftStore.typeName);
rightResultFrame.init(rightStore.storeOptions, rightStore.typeName);
final Dataset<Row> leftFrame = leftResultFrame.getDataFrame(joinRunner.getLeftResults());
final Dataset<Row> rightFrame = rightResultFrame.getDataFrame(joinRunner.getRightResults());
leftFrame.createOrReplaceTempView(leftStore.viewName);
rightFrame.createOrReplaceTempView(rightStore.viewName);
}
// Run the remaining query through the session sql runner.
// This will likely attempt to regenerate the join, but should reuse the
// pairs generated from optimized join beforehand
final Dataset<Row> results = session.sql(sql);
return results;
}
use of org.locationtech.geowave.core.store.AdapterToIndexMapping in project geowave by locationtech.
the class AccumuloOperations method addConstraintsScanIteratorSettings.
protected <T> void addConstraintsScanIteratorSettings(final ReaderParams<T> params, final ScannerBase scanner, final DataStoreOptions options) {
addFieldSubsettingToIterator(params, scanner);
IteratorSetting iteratorSettings = null;
if (params.isServersideAggregation()) {
if (params.isMixedVisibility()) {
iteratorSettings = new IteratorSetting(QueryFilterIterator.QUERY_ITERATOR_PRIORITY, QueryFilterIterator.QUERY_ITERATOR_NAME, WholeRowAggregationIterator.class);
} else {
iteratorSettings = new IteratorSetting(QueryFilterIterator.QUERY_ITERATOR_PRIORITY, QueryFilterIterator.QUERY_ITERATOR_NAME, AggregationIterator.class);
}
if ((params.getIndex() != null) && (params.getIndex().getIndexModel() != null)) {
iteratorSettings.addOption(QueryFilterIterator.MODEL, ByteArrayUtils.byteArrayToString(PersistenceUtils.toBinary(params.getIndex().getIndexModel())));
}
if ((params.getIndex() != null) && (params.getIndex().getIndexStrategy() != null)) {
iteratorSettings.addOption(QueryFilterIterator.PARTITION_KEY_LENGTH, Integer.toString(params.getIndex().getIndexStrategy().getPartitionKeyLength()));
}
if (!(params.getAggregation().getRight() instanceof CommonIndexAggregation) && (params.getAggregation().getLeft() != null)) {
iteratorSettings.addOption(AggregationIterator.ADAPTER_OPTION_NAME, ByteArrayUtils.byteArrayToString(PersistenceUtils.toBinary(params.getAggregation().getLeft())));
final AdapterToIndexMapping mapping = params.getAdapterIndexMappingStore().getMapping(params.getAggregation().getLeft().getAdapterId(), params.getIndex().getName());
iteratorSettings.addOption(AggregationIterator.ADAPTER_INDEX_MAPPING_OPTION_NAME, ByteArrayUtils.byteArrayToString(PersistenceUtils.toBinary(mapping)));
}
final Aggregation aggr = params.getAggregation().getRight();
iteratorSettings.addOption(AggregationIterator.AGGREGATION_OPTION_NAME, ByteArrayUtils.byteArrayToString(PersistenceUtils.toClassId(aggr)));
if (aggr.getParameters() != null) {
// sets the parameters
iteratorSettings.addOption(AggregationIterator.PARAMETER_OPTION_NAME, ByteArrayUtils.byteArrayToString((PersistenceUtils.toBinary(aggr.getParameters()))));
}
}
boolean usingDistributableFilter = false;
if ((params.getFilter() != null) && !options.isSecondaryIndexing()) {
usingDistributableFilter = true;
if (iteratorSettings == null) {
if (params.isMixedVisibility()) {
iteratorSettings = new IteratorSetting(QueryFilterIterator.QUERY_ITERATOR_PRIORITY, QueryFilterIterator.QUERY_ITERATOR_NAME, WholeRowQueryFilterIterator.class);
} else {
iteratorSettings = new IteratorSetting(QueryFilterIterator.QUERY_ITERATOR_PRIORITY, QueryFilterIterator.QUERY_ITERATOR_NAME, QueryFilterIterator.class);
}
}
iteratorSettings.addOption(QueryFilterIterator.FILTER, ByteArrayUtils.byteArrayToString(PersistenceUtils.toBinary(params.getFilter())));
if (!iteratorSettings.getOptions().containsKey(QueryFilterIterator.MODEL)) {
// it may already be added as an option if its an aggregation
iteratorSettings.addOption(QueryFilterIterator.MODEL, ByteArrayUtils.byteArrayToString(PersistenceUtils.toBinary(params.getIndex().getIndexModel())));
iteratorSettings.addOption(QueryFilterIterator.PARTITION_KEY_LENGTH, Integer.toString(params.getIndex().getIndexStrategy().getPartitionKeyLength()));
}
} else if ((iteratorSettings == null) && params.isMixedVisibility()) {
// we have to at least use a whole row iterator
iteratorSettings = new IteratorSetting(QueryFilterIterator.QUERY_ITERATOR_PRIORITY, QueryFilterIterator.QUERY_ITERATOR_NAME, WholeRowIterator.class);
}
if (!usingDistributableFilter && (!options.isSecondaryIndexing())) {
// it ends up being duplicative and slower to add both a
// distributable query and the index constraints, but one of the two
// is important to limit client-side filtering
addIndexFilterToIterator(params, scanner);
}
if (iteratorSettings != null) {
scanner.addScanIterator(iteratorSettings);
}
}
use of org.locationtech.geowave.core.store.AdapterToIndexMapping in project geowave by locationtech.
the class GeoWaveAttributeIndexIT method testTemporalAttributeIndex.
@Test
public void testTemporalAttributeIndex() {
final DataStore ds = dataStore.createDataStore();
final DataTypeAdapter<SimpleFeature> adapter = createDataAdapter();
final Index spatialIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions());
ds.addType(adapter, spatialIndex);
Index temporalAttributeIndex = AttributeDimensionalityTypeProvider.createIndexFromOptions(ds, new AttributeIndexOptions(TYPE_NAME, TIMESTAMP_FIELD));
ds.addIndex(TYPE_NAME, temporalAttributeIndex);
temporalAttributeIndex = ds.getIndex(temporalAttributeIndex.getName());
assertTrue(temporalAttributeIndex instanceof AttributeIndex);
assertEquals(TIMESTAMP_FIELD, ((AttributeIndex) temporalAttributeIndex).getAttributeName());
final InternalAdapterStore adapterStore = dataStore.createInternalAdapterStore();
final AdapterIndexMappingStore mappingStore = dataStore.createAdapterIndexMappingStore();
// Get the mapping for the attribute index
final AdapterToIndexMapping mapping = mappingStore.getMapping(adapterStore.getAdapterId(adapter.getTypeName()), temporalAttributeIndex.getName());
assertEquals(1, mapping.getIndexFieldMappers().size());
final IndexFieldMapper<?, ?> fieldMapper = mapping.getIndexFieldMappers().get(0);
assertEquals(Date.class, fieldMapper.adapterFieldType());
assertEquals(Long.class, fieldMapper.indexFieldType());
assertEquals(1, fieldMapper.getAdapterFields().length);
assertEquals(TIMESTAMP_FIELD, fieldMapper.getAdapterFields()[0]);
// Ingest data
ingestData(ds);
// Query data from attribute index
try (CloseableIterator<SimpleFeature> iterator = ds.query(QueryBuilder.newBuilder(SimpleFeature.class).indexName(temporalAttributeIndex.getName()).build())) {
assertTrue(iterator.hasNext());
// Half of the values are null and won't be indexed
assertEquals(TOTAL_FEATURES / 2, Iterators.size(iterator));
}
final Filter timeFilter = TemporalFieldValue.of(TIMESTAMP_FIELD).isBetween(new Date((long) (ONE_DAY_MILLIS * 10.5)), new Date((long) (ONE_DAY_MILLIS * 24.5)));
// Query data from attribute index with a numeric range constraint
try (CloseableIterator<SimpleFeature> iterator = ds.query(QueryBuilder.newBuilder(SimpleFeature.class).indexName(temporalAttributeIndex.getName()).filter(timeFilter).build())) {
assertTrue(iterator.hasNext());
assertEquals(7, Iterators.size(iterator));
}
}
use of org.locationtech.geowave.core.store.AdapterToIndexMapping in project geowave by locationtech.
the class FeatureDataAdapterTest method testSingleTime.
@Test
public void testSingleTime() {
schema.getDescriptor("when").getUserData().clear();
schema.getDescriptor("whennot").getUserData().put("time", Boolean.TRUE);
final FeatureDataAdapter dataAdapter = new FeatureDataAdapter(schema);
final Index spatialIndex = SpatialTemporalDimensionalityTypeProvider.createIndexFromOptions(new SpatialTemporalOptions());
final AdapterToIndexMapping indexMapping = BaseDataStoreUtils.mapAdapterToIndex(dataAdapter.asInternalAdapter((short) -1), spatialIndex);
final byte[] binary = dataAdapter.toBinary();
final FeatureDataAdapter dataAdapterCopy = new FeatureDataAdapter();
dataAdapterCopy.fromBinary(binary);
assertEquals(dataAdapterCopy.getTypeName(), dataAdapter.getTypeName());
assertEquals(dataAdapterCopy.getFeatureType(), dataAdapter.getFeatureType());
assertEquals(Boolean.TRUE, dataAdapterCopy.getFeatureType().getDescriptor("whennot").getUserData().get("time"));
assertEquals(2, indexMapping.getIndexFieldMappers().size());
assertNotNull(indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID));
assertEquals(1, indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID).adapterFieldCount());
assertEquals("whennot", indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID).getAdapterFields()[0]);
assertNotNull(indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME));
assertEquals(1, indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME).adapterFieldCount());
assertEquals("geometry", indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME).getAdapterFields()[0]);
}
use of org.locationtech.geowave.core.store.AdapterToIndexMapping in project geowave by locationtech.
the class FeatureDataAdapterTest method testRange.
@Test
public void testRange() {
schema.getDescriptor("when").getUserData().clear();
schema.getDescriptor("whennot").getUserData().clear();
schema.getDescriptor("when").getUserData().put("start", Boolean.TRUE);
schema.getDescriptor("whennot").getUserData().put("end", Boolean.TRUE);
final FeatureDataAdapter dataAdapter = new FeatureDataAdapter(schema);
final Index spatialIndex = SpatialTemporalDimensionalityTypeProvider.createIndexFromOptions(new SpatialTemporalOptions());
final AdapterToIndexMapping indexMapping = BaseDataStoreUtils.mapAdapterToIndex(dataAdapter.asInternalAdapter((short) -1), spatialIndex);
final byte[] binary = dataAdapter.toBinary();
final FeatureDataAdapter dataAdapterCopy = new FeatureDataAdapter();
dataAdapterCopy.fromBinary(binary);
assertEquals(dataAdapterCopy.getTypeName(), dataAdapter.getTypeName());
assertEquals(dataAdapterCopy.getFeatureType(), dataAdapter.getFeatureType());
assertEquals(Boolean.TRUE, dataAdapterCopy.getFeatureType().getDescriptor("whennot").getUserData().get("end"));
assertEquals(Boolean.TRUE, dataAdapterCopy.getFeatureType().getDescriptor("when").getUserData().get("start"));
assertEquals(2, indexMapping.getIndexFieldMappers().size());
assertNotNull(indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID));
assertEquals(2, indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID).adapterFieldCount());
assertEquals("when", indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID).getAdapterFields()[0]);
assertEquals("whennot", indexMapping.getMapperForIndexField(TimeField.DEFAULT_FIELD_ID).getAdapterFields()[1]);
assertNotNull(indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME));
assertEquals(1, indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME).adapterFieldCount());
assertEquals("geometry", indexMapping.getMapperForIndexField(SpatialField.DEFAULT_GEOMETRY_FIELD_NAME).getAdapterFields()[0]);
}
Aggregations