Search in sources :

Example 1 with TextIndexStrategy

use of org.locationtech.geowave.core.index.text.TextIndexStrategy in project geowave by locationtech.

the class FilterConstraints method getIndexData.

/**
 * Get the multi-dimensional index data from these constraints.
 *
 * @return the multi-dimensional index data
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public List<MultiDimensionalIndexData<V>> getIndexData() {
    if (cachedIndexData == null) {
        if ((adapter == null) || (index == null) || (indexMapping == null)) {
            return Lists.newArrayList();
        }
        if (index instanceof CustomIndex) {
            final TextIndexStrategy indexStrategy = (TextIndexStrategy) ((CustomIndex) index).getCustomIndexStrategy();
            if (!(indexStrategy.getEntryConverter() instanceof AdapterFieldTextIndexEntryConverter)) {
                throw new RuntimeException("Unable to determine adapter field used by text index.");
            }
            final String fieldName = ((AdapterFieldTextIndexEntryConverter) indexStrategy.getEntryConverter()).getFieldName();
            final IndexFieldConstraints<?> fieldConstraint = fieldConstraints.get(fieldName);
            final List<DimensionConstraints<String>> dimensionConstraints = Lists.newArrayList();
            if (fieldConstraint == null) {
                dimensionConstraints.add(DimensionConstraints.of(Lists.newArrayList(FilterRange.of((String) null, (String) null, true, true, true))));
            } else if (fieldConstraint instanceof TextFieldConstraints) {
                final DimensionConstraints<String> dimensionConstraint = ((TextFieldConstraints) fieldConstraint).getDimensionRanges(0);
                if (dimensionConstraint == null) {
                    dimensionConstraints.add(DimensionConstraints.of(Lists.newArrayList(FilterRange.of((String) null, (String) null, true, true, true))));
                } else {
                    dimensionConstraints.add(dimensionConstraint);
                }
            } else {
                throw new RuntimeException("Non-text field constraints cannot be used for a text index.");
            }
            cachedIndexData = (List) TextFieldConstraints.toIndexData(dimensionConstraints);
        } else {
            // Right now all index strategies that aren't custom are numeric
            final CommonIndexModel indexModel = index.getIndexModel();
            final int numStrategyDimensions = index.getIndexStrategy().getOrderedDimensionDefinitions().length;
            final List<DimensionConstraints<Double>> dimensionConstraints = Lists.newArrayListWithCapacity(numStrategyDimensions);
            final Map<String, Integer> indexFieldDimensions = Maps.newHashMap();
            final NumericDimensionField<?>[] dimensions = indexModel.getDimensions();
            int dimensionIndex = 0;
            for (final NumericDimensionField<?> indexField : dimensions) {
                if (dimensionIndex >= numStrategyDimensions) {
                    // Only build constraints for dimensions used by the index strategy.
                    break;
                }
                dimensionIndex++;
                final String indexFieldName = indexField.getFieldName();
                if (!indexFieldDimensions.containsKey(indexFieldName)) {
                    indexFieldDimensions.put(indexFieldName, 0);
                }
                final int indexFieldDimension = indexFieldDimensions.get(indexFieldName);
                final IndexFieldMapper<?, ?> mapper = indexMapping.getMapperForIndexField(indexFieldName);
                final String[] adapterFields = mapper.getIndexOrderedAdapterFields();
                IndexFieldConstraints<?> fieldConstraint = null;
                if (adapterFields.length > 1 && isSingleDimension(indexFieldName, dimensions)) {
                    // constraints
                    for (int i = 0; i < adapterFields.length; i++) {
                        final IndexFieldConstraints<?> constraint = fieldConstraints.get(adapterFields[i]);
                        if (fieldConstraint == null) {
                            fieldConstraint = constraint;
                        } else {
                            fieldConstraint.and((IndexFieldConstraints) constraint);
                        }
                    }
                } else {
                    fieldConstraint = fieldConstraints.get(adapterFields[indexFieldDimension % adapterFields.length]);
                }
                if (fieldConstraint == null) {
                    dimensionConstraints.add(DimensionConstraints.of(Lists.newArrayList(FilterRange.of((Double) null, (Double) null, true, true, true))));
                } else if (fieldConstraint instanceof NumericFieldConstraints) {
                    final DimensionConstraints<Double> dimensionConstraint = ((NumericFieldConstraints) fieldConstraint).getDimensionRanges(indexFieldDimension % fieldConstraint.getDimensionCount());
                    if (dimensionConstraint == null) {
                        dimensionConstraints.add(DimensionConstraints.of(Lists.newArrayList(FilterRange.of((Double) null, (Double) null, true, true, true))));
                    } else {
                        dimensionConstraints.add(dimensionConstraint);
                    }
                    indexFieldDimensions.put(indexFieldName, indexFieldDimension + 1);
                } else {
                    throw new RuntimeException("Non-numeric field constraints cannot be used for a numeric index.");
                }
            }
            cachedIndexData = (List) NumericFieldConstraints.toIndexData(dimensionConstraints);
        }
    }
    return cachedIndexData;
}
Also used : NumericDimensionField(org.locationtech.geowave.core.store.dimension.NumericDimensionField) NumericFieldConstraints(org.locationtech.geowave.core.store.query.filter.expression.numeric.NumericFieldConstraints) AdapterFieldTextIndexEntryConverter(org.locationtech.geowave.core.store.index.TextAttributeIndexProvider.AdapterFieldTextIndexEntryConverter) CommonIndexModel(org.locationtech.geowave.core.store.index.CommonIndexModel) DimensionConstraints(org.locationtech.geowave.core.store.query.filter.expression.IndexFieldConstraints.DimensionConstraints) TextFieldConstraints(org.locationtech.geowave.core.store.query.filter.expression.text.TextFieldConstraints) TextIndexStrategy(org.locationtech.geowave.core.index.text.TextIndexStrategy) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex)

Example 2 with TextIndexStrategy

use of org.locationtech.geowave.core.index.text.TextIndexStrategy in project geowave by locationtech.

the class OptimalExpressionQuery method determineBestIndices.

@SuppressWarnings({ "rawtypes", "unchecked" })
public List<Pair<Index, List<InternalDataAdapter<?>>>> determineBestIndices(final BaseQueryOptions baseOptions, final InternalDataAdapter<?>[] adapters, final AdapterIndexMappingStore adapterIndexMappingStore, final IndexStore indexStore, final DataStatisticsStore statisticsStore) {
    final Map<Index, List<InternalDataAdapter<?>>> bestIndices = Maps.newHashMap();
    final Set<String> referencedFields = Sets.newHashSet();
    filter.addReferencedFields(referencedFields);
    for (final InternalDataAdapter<?> adapter : adapters) {
        if (!adapterMatchesFilter(adapter, referencedFields)) {
            continue;
        }
        final AdapterToIndexMapping[] adapterIndices = adapterIndexMappingStore.getIndicesForAdapter(adapter.getAdapterId());
        final Map<Index, FilterConstraints<?>> indexConstraints = Maps.newHashMap();
        Index bestIndex = null;
        for (final AdapterToIndexMapping mapping : adapterIndices) {
            if ((baseOptions.getIndexName() != null) && !baseOptions.getIndexName().equals(mapping.getIndexName())) {
                continue;
            }
            final Index index = mapping.getIndex(indexStore);
            if (indexFilter != null && !indexFilter.test(index)) {
                continue;
            }
            if ((bestIndex == null) || ((bestIndex instanceof AttributeIndex) && !(index instanceof AttributeIndex))) {
                bestIndex = index;
            }
            final Set<String> indexedFields = Sets.newHashSet();
            final Class<? extends Comparable> filterClass;
            if ((index instanceof CustomIndex) && (((CustomIndex<?, ?>) index).getCustomIndexStrategy() instanceof TextIndexStrategy)) {
                final TextIndexStrategy<?> indexStrategy = (TextIndexStrategy<?>) ((CustomIndex<?, ?>) index).getCustomIndexStrategy();
                if (!(indexStrategy.getEntryConverter() instanceof AdapterFieldTextIndexEntryConverter)) {
                    continue;
                }
                indexedFields.add(((AdapterFieldTextIndexEntryConverter<?>) indexStrategy.getEntryConverter()).getFieldName());
                filterClass = String.class;
            } else {
                for (final IndexFieldMapper<?, ?> mapper : mapping.getIndexFieldMappers()) {
                    for (final String adapterField : mapper.getAdapterFields()) {
                        indexedFields.add(adapterField);
                    }
                }
                // Remove any fields that are part of the common index model, but not used in the index
                // strategy. They shouldn't be considered when trying to find a best match. In the future
                // it may be useful to consider an index that has extra common index dimensions that
                // contain filtered fields over one that only matches indexed dimensions. For example, if
                // I have a spatial index, and a spatial index that stores time, it should pick the one
                // that stores time if I supply a temporal constraint, even though it isn't part of the
                // index strategy.
                final int modelDimensions = index.getIndexModel().getDimensions().length;
                final int strategyDimensions = index.getIndexStrategy().getOrderedDimensionDefinitions().length;
                for (int i = modelDimensions - 1; i >= strategyDimensions; i--) {
                    final IndexFieldMapper<?, ?> mapper = mapping.getMapperForIndexField(index.getIndexModel().getDimensions()[i].getFieldName());
                    for (final String adapterField : mapper.getAdapterFields()) {
                        indexedFields.remove(adapterField);
                    }
                }
                filterClass = Double.class;
            }
            if (referencedFields.containsAll(indexedFields)) {
                final FilterConstraints<?> constraints = filter.getConstraints(filterClass, statisticsStore, adapter, mapping, index, indexedFields);
                if (constraints.constrainsAllFields(indexedFields)) {
                    indexConstraints.put(index, constraints);
                }
            }
        }
        if (indexConstraints.size() == 1) {
            final Entry<Index, FilterConstraints<?>> bestEntry = indexConstraints.entrySet().iterator().next();
            bestIndex = bestEntry.getKey();
            constraintCache.put(adapter.getTypeName(), bestEntry.getValue());
        } else if (indexConstraints.size() > 1) {
            // determine which constraint is the best
            double bestCardinality = Double.MAX_VALUE;
            Index bestConstrainedIndex = null;
            for (final Entry<Index, FilterConstraints<?>> entry : indexConstraints.entrySet()) {
                final QueryRanges ranges = entry.getValue().getQueryRanges(baseOptions, statisticsStore);
                if (ranges.isEmpty()) {
                    continue;
                }
                // TODO: A future optimization would be to add a default numeric histogram for any numeric
                // index dimensions and just use the index data ranges to determine cardinality rather
                // than decomposing query ranges.
                final StatisticId<RowRangeHistogramValue> statisticId = IndexStatistic.generateStatisticId(entry.getKey().getName(), RowRangeHistogramStatistic.STATS_TYPE, Statistic.INTERNAL_TAG);
                final RowRangeHistogramStatistic histogram = (RowRangeHistogramStatistic) statisticsStore.getStatisticById(statisticId);
                final double cardinality = DataStoreUtils.cardinality(statisticsStore, histogram, adapter, bestConstrainedIndex, ranges);
                if ((bestConstrainedIndex == null) || (cardinality < bestCardinality)) {
                    bestConstrainedIndex = entry.getKey();
                    bestCardinality = cardinality;
                }
            }
            if (bestConstrainedIndex != null) {
                bestIndex = bestConstrainedIndex;
                constraintCache.put(adapter.getTypeName(), indexConstraints.get(bestIndex));
            }
        }
        if (bestIndex == null) {
            continue;
        }
        if (!bestIndices.containsKey(bestIndex)) {
            bestIndices.put(bestIndex, Lists.newArrayList());
        }
        bestIndices.get(bestIndex).add(adapter);
    }
    return bestIndices.entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
}
Also used : RowRangeHistogramStatistic(org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic) IndexFilter(org.locationtech.geowave.core.store.index.IndexFilter) PersistenceUtils(org.locationtech.geowave.core.index.persist.PersistenceUtils) IndexFieldMapper(org.locationtech.geowave.core.store.api.IndexFieldMapper) QueryFilter(org.locationtech.geowave.core.store.query.filter.QueryFilter) LoggerFactory(org.slf4j.LoggerFactory) StatisticId(org.locationtech.geowave.core.store.statistics.StatisticId) ByteBuffer(java.nio.ByteBuffer) QueryRanges(org.locationtech.geowave.core.index.QueryRanges) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex) DataStatisticsStore(org.locationtech.geowave.core.store.statistics.DataStatisticsStore) AdapterIndexMappingStore(org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore) Sets(com.beust.jcommander.internal.Sets) Lists(com.google.common.collect.Lists) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) Filter(org.locationtech.geowave.core.store.query.filter.expression.Filter) Statistic(org.locationtech.geowave.core.store.api.Statistic) TextIndexStrategy(org.locationtech.geowave.core.index.text.TextIndexStrategy) VarintUtils(org.locationtech.geowave.core.index.VarintUtils) Index(org.locationtech.geowave.core.store.api.Index) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) InternalDataAdapter(org.locationtech.geowave.core.store.adapter.InternalDataAdapter) BaseQueryOptions(org.locationtech.geowave.core.store.base.BaseQueryOptions) Logger(org.slf4j.Logger) AdapterFieldTextIndexEntryConverter(org.locationtech.geowave.core.store.index.TextAttributeIndexProvider.AdapterFieldTextIndexEntryConverter) Set(java.util.Set) IndexStatistic(org.locationtech.geowave.core.store.api.IndexStatistic) Collectors(java.util.stream.Collectors) Maps(com.google.common.collect.Maps) AttributeIndex(org.locationtech.geowave.core.store.api.AttributeIndex) List(java.util.List) ExpressionQueryFilter(org.locationtech.geowave.core.store.query.filter.ExpressionQueryFilter) FilterConstraints(org.locationtech.geowave.core.store.query.filter.expression.FilterConstraints) IndexStore(org.locationtech.geowave.core.store.index.IndexStore) Entry(java.util.Map.Entry) ExplicitTextSearch(org.locationtech.geowave.core.index.text.ExplicitTextSearch) RowRangeHistogramValue(org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic.RowRangeHistogramValue) StatisticId(org.locationtech.geowave.core.store.statistics.StatisticId) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex) Index(org.locationtech.geowave.core.store.api.Index) AttributeIndex(org.locationtech.geowave.core.store.api.AttributeIndex) Entry(java.util.Map.Entry) AttributeIndex(org.locationtech.geowave.core.store.api.AttributeIndex) RowRangeHistogramStatistic(org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic) List(java.util.List) QueryRanges(org.locationtech.geowave.core.index.QueryRanges) AdapterFieldTextIndexEntryConverter(org.locationtech.geowave.core.store.index.TextAttributeIndexProvider.AdapterFieldTextIndexEntryConverter) FilterConstraints(org.locationtech.geowave.core.store.query.filter.expression.FilterConstraints) TextIndexStrategy(org.locationtech.geowave.core.index.text.TextIndexStrategy) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex)

Example 3 with TextIndexStrategy

use of org.locationtech.geowave.core.index.text.TextIndexStrategy in project geowave by locationtech.

the class GeoWaveAttributeIndexIT method testTextAttributeIndex.

@Test
public void testTextAttributeIndex() {
    final DataStore ds = dataStore.createDataStore();
    final DataTypeAdapter<SimpleFeature> adapter = createDataAdapter();
    final Index spatialIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions());
    ds.addType(adapter, spatialIndex);
    Index textAttributeIndex = AttributeDimensionalityTypeProvider.createIndexFromOptions(ds, new AttributeIndexOptions(TYPE_NAME, COMMENT_FIELD));
    ds.addIndex(TYPE_NAME, textAttributeIndex);
    textAttributeIndex = ds.getIndex(textAttributeIndex.getName());
    assertTrue(textAttributeIndex instanceof AttributeIndex);
    assertEquals(COMMENT_FIELD, ((AttributeIndex) textAttributeIndex).getAttributeName());
    assertTrue(textAttributeIndex instanceof CustomIndex);
    assertTrue(((CustomIndex<?, ?>) textAttributeIndex).getCustomIndexStrategy() instanceof TextIndexStrategy);
    final TextIndexStrategy<?> indexStrategy = (TextIndexStrategy<?>) ((CustomIndex<?, ?>) textAttributeIndex).getCustomIndexStrategy();
    assertTrue(indexStrategy.getEntryConverter() instanceof AdapterFieldTextIndexEntryConverter);
    final AdapterFieldTextIndexEntryConverter<?> converter = (AdapterFieldTextIndexEntryConverter<?>) indexStrategy.getEntryConverter();
    assertEquals(COMMENT_FIELD, converter.getFieldName());
    assertNotNull(converter.getAdapter());
    assertEquals(adapter.getTypeName(), converter.getAdapter().getTypeName());
    assertEquals(adapter.getFieldDescriptor(COMMENT_FIELD), converter.getAdapter().getFieldDescriptor(COMMENT_FIELD));
    final InternalAdapterStore adapterStore = dataStore.createInternalAdapterStore();
    final AdapterIndexMappingStore mappingStore = dataStore.createAdapterIndexMappingStore();
    // Get the mapping for the attribute index
    final AdapterToIndexMapping mapping = mappingStore.getMapping(adapterStore.getAdapterId(adapter.getTypeName()), textAttributeIndex.getName());
    // The text index is a custom index, so there won't be any direct field mappings
    assertEquals(0, mapping.getIndexFieldMappers().size());
    // Ingest data
    ingestData(ds);
    // Query data from attribute index
    try (CloseableIterator<SimpleFeature> iterator = ds.query(QueryBuilder.newBuilder(SimpleFeature.class).indexName(textAttributeIndex.getName()).build())) {
        assertTrue(iterator.hasNext());
        // The null values are not indexed, so only 3/4 of the data should be present
        assertEquals((int) (TOTAL_FEATURES * 0.75), Iterators.size(iterator));
    }
    final Filter textFilter = TextFieldValue.of(COMMENT_FIELD).startsWith("c", true);
    // Query data from attribute index with a text constraint
    try (CloseableIterator<SimpleFeature> iterator = ds.query(QueryBuilder.newBuilder(SimpleFeature.class).indexName(textAttributeIndex.getName()).filter(textFilter).build())) {
        assertTrue(iterator.hasNext());
        assertEquals(TOTAL_FEATURES / 4, Iterators.size(iterator));
    }
}
Also used : InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) AdapterFieldTextIndexEntryConverter(org.locationtech.geowave.core.store.index.TextAttributeIndexProvider.AdapterFieldTextIndexEntryConverter) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex) Index(org.locationtech.geowave.core.store.api.Index) AttributeIndex(org.locationtech.geowave.core.store.api.AttributeIndex) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) AdapterIndexMappingStore(org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore) SimpleFeature(org.opengis.feature.simple.SimpleFeature) AttributeIndexOptions(org.locationtech.geowave.core.store.index.AttributeIndexOptions) AttributeIndex(org.locationtech.geowave.core.store.api.AttributeIndex) Filter(org.locationtech.geowave.core.store.query.filter.expression.Filter) DataStore(org.locationtech.geowave.core.store.api.DataStore) TextIndexStrategy(org.locationtech.geowave.core.index.text.TextIndexStrategy) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex) Test(org.junit.Test)

Aggregations

TextIndexStrategy (org.locationtech.geowave.core.index.text.TextIndexStrategy)3 CustomIndex (org.locationtech.geowave.core.store.index.CustomIndex)3 AdapterFieldTextIndexEntryConverter (org.locationtech.geowave.core.store.index.TextAttributeIndexProvider.AdapterFieldTextIndexEntryConverter)3 AdapterToIndexMapping (org.locationtech.geowave.core.store.AdapterToIndexMapping)2 AdapterIndexMappingStore (org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore)2 AttributeIndex (org.locationtech.geowave.core.store.api.AttributeIndex)2 Index (org.locationtech.geowave.core.store.api.Index)2 Filter (org.locationtech.geowave.core.store.query.filter.expression.Filter)2 Sets (com.beust.jcommander.internal.Sets)1 Lists (com.google.common.collect.Lists)1 Maps (com.google.common.collect.Maps)1 ByteBuffer (java.nio.ByteBuffer)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 Pair (org.apache.commons.lang3.tuple.Pair)1 Test (org.junit.Test)1 SpatialOptions (org.locationtech.geowave.core.geotime.index.SpatialOptions)1