Search in sources :

Example 1 with DedupeFilter

use of org.locationtech.geowave.core.store.query.filter.DedupeFilter in project geowave by locationtech.

the class AccumuloUtils method getIterator.

private static CloseableIterator<Entry<Key, Value>> getIterator(final Connector connector, final String namespace, final Index index) throws AccumuloException, AccumuloSecurityException, IOException, TableNotFoundException {
    CloseableIterator<Entry<Key, Value>> iterator = null;
    final AccumuloOptions options = new AccumuloOptions();
    final AccumuloOperations operations = new AccumuloOperations(connector, namespace, new AccumuloOptions());
    final IndexStore indexStore = new IndexStoreImpl(operations, options);
    final PersistentAdapterStore adapterStore = new AdapterStoreImpl(operations, options);
    final AdapterIndexMappingStore mappingStore = new AdapterIndexMappingStoreImpl(operations, options);
    if (indexStore.indexExists(index.getName())) {
        final ScannerBase scanner = operations.createBatchScanner(index.getName());
        ((BatchScanner) scanner).setRanges(AccumuloUtils.byteArrayRangesToAccumuloRanges(null));
        final IteratorSetting iteratorSettings = new IteratorSetting(10, "GEOWAVE_WHOLE_ROW_ITERATOR", WholeRowIterator.class);
        scanner.addScanIterator(iteratorSettings);
        final Iterator<Entry<Key, Value>> it = new IteratorWrapper(adapterStore, mappingStore, index, scanner.iterator(), new QueryFilter[] { new DedupeFilter() });
        iterator = new CloseableIteratorWrapper<>(new ScannerClosableWrapper(scanner), it);
    }
    return iterator;
}
Also used : AccumuloOperations(org.locationtech.geowave.datastore.accumulo.operations.AccumuloOperations) ScannerBase(org.apache.accumulo.core.client.ScannerBase) BatchScanner(org.apache.accumulo.core.client.BatchScanner) AdapterIndexMappingStore(org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore) IndexStoreImpl(org.locationtech.geowave.core.store.metadata.IndexStoreImpl) Entry(java.util.Map.Entry) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) AdapterIndexMappingStoreImpl(org.locationtech.geowave.core.store.metadata.AdapterIndexMappingStoreImpl) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) CloseableIteratorWrapper(org.locationtech.geowave.core.store.CloseableIteratorWrapper) AccumuloOptions(org.locationtech.geowave.datastore.accumulo.config.AccumuloOptions) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) AdapterStoreImpl(org.locationtech.geowave.core.store.metadata.AdapterStoreImpl) IndexStore(org.locationtech.geowave.core.store.index.IndexStore)

Example 2 with DedupeFilter

use of org.locationtech.geowave.core.store.query.filter.DedupeFilter in project geowave by locationtech.

the class BaseConstraintsQuery method query.

@SuppressWarnings("unchecked")
@Override
public CloseableIterator<Object> query(final DataStoreOperations datastoreOperations, final DataStoreOptions options, final PersistentAdapterStore adapterStore, final AdapterIndexMappingStore mappingStore, final InternalAdapterStore internalAdapterStore, final double[] maxResolutionSubsamplingPerDimension, final double[] targetResolutionPerDimensionForHierarchicalIndex, final Integer limit, final Integer queryMaxRangeDecomposition, final boolean delete) {
    if (isAggregation()) {
        if ((options == null) || !options.isServerSideLibraryEnabled()) {
            // Aggregate client-side
            final CloseableIterator<Object> it = super.query(datastoreOperations, options, adapterStore, mappingStore, internalAdapterStore, maxResolutionSubsamplingPerDimension, targetResolutionPerDimensionForHierarchicalIndex, limit, queryMaxRangeDecomposition, false);
            return BaseDataStoreUtils.aggregate(it, (Aggregation<?, ?, Object>) aggregation.getRight(), (DataTypeAdapter) aggregation.getLeft());
        } else {
            // incorrect results
            if (!clientFilters.isEmpty()) {
                final QueryFilter f = clientFilters.get(clientFilters.size() - 1);
                if (f instanceof DedupeFilter) {
                    // in case the list is immutable or null we need to create a new mutable list
                    if (distributableFilters != null) {
                        distributableFilters = new ArrayList<>(distributableFilters);
                    } else {
                        distributableFilters = new ArrayList<>();
                    }
                    distributableFilters.add(f);
                    LOGGER.warn("Aggregating results when duplicates exist in the table may result in duplicate aggregation");
                }
            }
            try (final RowReader<GeoWaveRow> reader = getReader(datastoreOperations, options, adapterStore, mappingStore, internalAdapterStore, maxResolutionSubsamplingPerDimension, targetResolutionPerDimensionForHierarchicalIndex, limit, queryMaxRangeDecomposition, GeoWaveRowIteratorTransformer.NO_OP_TRANSFORMER, false)) {
                Object mergedAggregationResult = null;
                final Aggregation<?, Object, Object> agg = (Aggregation<?, Object, Object>) aggregation.getValue();
                if ((reader == null) || !reader.hasNext()) {
                    return new CloseableIterator.Empty();
                } else {
                    while (reader.hasNext()) {
                        final GeoWaveRow row = reader.next();
                        for (final GeoWaveValue value : row.getFieldValues()) {
                            if ((value.getValue() != null) && (value.getValue().length > 0)) {
                                if (mergedAggregationResult == null) {
                                    mergedAggregationResult = agg.resultFromBinary(value.getValue());
                                } else {
                                    mergedAggregationResult = agg.merge(mergedAggregationResult, agg.resultFromBinary(value.getValue()));
                                }
                            }
                        }
                    }
                    return new CloseableIterator.Wrapper<>(Iterators.singletonIterator(mergedAggregationResult));
                }
            } catch (final Exception e) {
                LOGGER.warn("Unable to close reader for aggregation", e);
            }
        }
    }
    return super.query(datastoreOperations, options, adapterStore, mappingStore, internalAdapterStore, maxResolutionSubsamplingPerDimension, targetResolutionPerDimensionForHierarchicalIndex, limit, queryMaxRangeDecomposition, delete);
}
Also used : GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) GeoWaveValue(org.locationtech.geowave.core.store.entities.GeoWaveValue) Aggregation(org.locationtech.geowave.core.store.api.Aggregation) QueryFilter(org.locationtech.geowave.core.store.query.filter.QueryFilter) CoordinateRangeQueryFilter(org.locationtech.geowave.core.store.query.filter.CoordinateRangeQueryFilter) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter)

Example 3 with DedupeFilter

use of org.locationtech.geowave.core.store.query.filter.DedupeFilter in project geowave by locationtech.

the class BaseDataStore method internalQuery.

protected <T> CloseableIterator<T> internalQuery(final QueryConstraints constraints, final BaseQueryOptions queryOptions, final DeletionMode deleteMode) {
    // Note: The DeletionMode option is provided to avoid recursively
    // adding DuplicateDeletionCallbacks when actual duplicates are removed
    // via the DuplicateDeletionCallback. The callback should only be added
    // during the initial deletion query.
    final boolean delete = ((deleteMode == DeletionMode.DELETE) || (deleteMode == DeletionMode.DELETE_WITH_DUPLICATES));
    final List<CloseableIterator<Object>> results = new ArrayList<>();
    // If CQL filter is set
    if (constraints instanceof TypeConstraintQuery) {
        final String constraintTypeName = ((TypeConstraintQuery) constraints).getTypeName();
        if ((queryOptions.getAdapterIds() == null) || (queryOptions.getAdapterIds().length == 0)) {
            queryOptions.setAdapterId(internalAdapterStore.getAdapterId(constraintTypeName));
        } else if (queryOptions.getAdapterIds().length == 1) {
            final Short adapterId = internalAdapterStore.getAdapterId(constraintTypeName);
            if ((adapterId == null) || (queryOptions.getAdapterIds()[0] != adapterId.shortValue())) {
                LOGGER.error("Constraint Query Type name does not match Query Options Type Name");
                throw new RuntimeException("Constraint Query Type name does not match Query Options Type Name");
            }
        } else {
            // Throw exception when QueryOptions has more than one adapter
            // and CQL Adapter is set.
            LOGGER.error("Constraint Query Type name does not match Query Options Type Name");
            throw new RuntimeException("Constraint Query Type name does not match Query Options Type Name");
        }
    }
    final QueryConstraints sanitizedConstraints = (constraints == null) ? new EverythingQuery() : constraints;
    final List<DataStoreCallbackManager> deleteCallbacks = new ArrayList<>();
    final Map<Short, Set<ByteArray>> dataIdsToDelete;
    if (DeletionMode.DELETE_WITH_DUPLICATES.equals(deleteMode) && (baseOptions.isSecondaryIndexing())) {
        dataIdsToDelete = new ConcurrentHashMap<>();
    } else {
        dataIdsToDelete = null;
    }
    final boolean dataIdIndexIsBest = baseOptions.isSecondaryIndexing() && ((sanitizedConstraints instanceof DataIdQuery) || (sanitizedConstraints instanceof DataIdRangeQuery) || (sanitizedConstraints instanceof EverythingQuery));
    if (!delete && dataIdIndexIsBest) {
        try {
            // just grab the values directly from the Data Index
            InternalDataAdapter<?>[] adapters = queryOptions.getAdaptersArray(adapterStore);
            if (!queryOptions.isAllIndices()) {
                final Set<Short> adapterIds = new HashSet<>(Arrays.asList(ArrayUtils.toObject(queryOptions.getValidAdapterIds(internalAdapterStore, indexMappingStore))));
                adapters = Arrays.stream(adapters).filter(a -> adapterIds.contains(a.getAdapterId())).toArray(i -> new InternalDataAdapter<?>[i]);
            }
            // TODO test whether aggregations work in this case
            for (final InternalDataAdapter<?> adapter : adapters) {
                RowReader<GeoWaveRow> rowReader;
                if (sanitizedConstraints instanceof DataIdQuery) {
                    rowReader = DataIndexUtils.getRowReader(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), adapter.getAdapterId(), ((DataIdQuery) sanitizedConstraints).getDataIds());
                } else if (sanitizedConstraints instanceof DataIdRangeQuery) {
                    if (((DataIdRangeQuery) sanitizedConstraints).isReverse() && !isReverseIterationSupported()) {
                        throw new UnsupportedOperationException("Currently the underlying datastore does not support reverse iteration");
                    }
                    rowReader = DataIndexUtils.getRowReader(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), adapter.getAdapterId(), ((DataIdRangeQuery) sanitizedConstraints).getStartDataIdInclusive(), ((DataIdRangeQuery) sanitizedConstraints).getEndDataIdInclusive(), ((DataIdRangeQuery) sanitizedConstraints).isReverse());
                } else {
                    rowReader = DataIndexUtils.getRowReader(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), adapter.getAdapterId());
                }
                results.add(new CloseableIteratorWrapper(rowReader, new NativeEntryIteratorWrapper(adapterStore, indexMappingStore, DataIndexUtils.DATA_ID_INDEX, rowReader, null, queryOptions.getScanCallback(), BaseDataStoreUtils.getFieldBitmask(queryOptions.getFieldIdsAdapterPair(), DataIndexUtils.DATA_ID_INDEX), queryOptions.getMaxResolutionSubsamplingPerDimension(), !BaseDataStoreUtils.isCommonIndexAggregation(queryOptions.getAggregation()), null)));
            }
            if (BaseDataStoreUtils.isAggregation(queryOptions.getAggregation())) {
                return BaseDataStoreUtils.aggregate(new CloseableIteratorWrapper(new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (final CloseableIterator<Object> result : results) {
                            result.close();
                        }
                    }
                }, Iterators.concat(results.iterator())), (Aggregation) queryOptions.getAggregation().getRight(), (DataTypeAdapter) queryOptions.getAggregation().getLeft());
            }
        } catch (final IOException e1) {
            LOGGER.error("Failed to resolve adapter or index for query", e1);
        }
    } else {
        final boolean isConstraintsAdapterIndexSpecific = sanitizedConstraints instanceof AdapterAndIndexBasedQueryConstraints;
        final boolean isAggregationAdapterIndexSpecific = (queryOptions.getAggregation() != null) && (queryOptions.getAggregation().getRight() instanceof AdapterAndIndexBasedAggregation);
        // all queries will use the same instance of the dedupe filter for
        // client side filtering because the filter needs to be applied across
        // indices
        DedupeFilter dedupeFilter = new DedupeFilter();
        MemoryPersistentAdapterStore tempAdapterStore = new MemoryPersistentAdapterStore(queryOptions.getAdaptersArray(adapterStore));
        MemoryAdapterIndexMappingStore memoryMappingStore = new MemoryAdapterIndexMappingStore();
        // keep a list of adapters that have been queried, to only load an
        // adapter to be queried once
        final Set<Short> queriedAdapters = new HashSet<>();
        // if its an ordered constraints then it is dependent on the index selected, if its
        // secondary indexing its inefficient to delete by constraints
        final boolean deleteAllIndicesByConstraints = ((delete && ((constraints == null) || !constraints.indexMustBeSpecified()) && !baseOptions.isSecondaryIndexing()));
        final List<Pair<Index, List<InternalDataAdapter<?>>>> indexAdapterPairList = (deleteAllIndicesByConstraints) ? queryOptions.getIndicesForAdapters(tempAdapterStore, indexMappingStore, indexStore) : queryOptions.getBestQueryIndices(tempAdapterStore, indexMappingStore, indexStore, statisticsStore, sanitizedConstraints);
        Map<Short, List<Index>> additionalIndicesToDelete = null;
        if (DeletionMode.DELETE_WITH_DUPLICATES.equals(deleteMode) && !deleteAllIndicesByConstraints) {
            additionalIndicesToDelete = new HashMap<>();
            // we have to make sure to delete from the other indices if they exist
            final List<Pair<Index, List<InternalDataAdapter<?>>>> allIndices = queryOptions.getIndicesForAdapters(tempAdapterStore, indexMappingStore, indexStore);
            for (final Pair<Index, List<InternalDataAdapter<?>>> allPair : allIndices) {
                for (final Pair<Index, List<InternalDataAdapter<?>>> constraintPair : indexAdapterPairList) {
                    if (((constraintPair.getKey() == null) && (allPair.getKey() == null)) || constraintPair.getKey().equals(allPair.getKey())) {
                        allPair.getRight().removeAll(constraintPair.getRight());
                        break;
                    }
                }
                for (final InternalDataAdapter<?> adapter : allPair.getRight()) {
                    List<Index> indices = additionalIndicesToDelete.get(adapter.getAdapterId());
                    if (indices == null) {
                        indices = new ArrayList<>();
                        additionalIndicesToDelete.put(adapter.getAdapterId(), indices);
                    }
                    indices.add(allPair.getLeft());
                }
            }
        }
        final Pair<InternalDataAdapter<?>, Aggregation<?, ?, ?>> aggregation = queryOptions.getAggregation();
        final ScanCallback callback = queryOptions.getScanCallback();
        for (final Pair<Index, List<InternalDataAdapter<?>>> indexAdapterPair : indexAdapterPairList) {
            if (indexAdapterPair.getKey() == null) {
                // queries
                if (dataIdIndexIsBest) {
                    // prior logic for !delete
                    for (final InternalDataAdapter adapter : indexAdapterPair.getRight()) {
                        // this must be a data index only adapter, just worry about updating statistics and
                        // not other indices or duplicates
                        ScanCallback scanCallback = callback;
                        if (baseOptions.isPersistDataStatistics()) {
                            final DataStoreCallbackManager callbackCache = new DataStoreCallbackManager(statisticsStore, queriedAdapters.add(adapter.getAdapterId()));
                            deleteCallbacks.add(callbackCache);
                            scanCallback = new ScanCallback<Object, GeoWaveRow>() {

                                @Override
                                public void entryScanned(final Object entry, final GeoWaveRow row) {
                                    if (callback != null) {
                                        callback.entryScanned(entry, row);
                                    }
                                    callbackCache.getDeleteCallback(adapter, null, null).entryDeleted(entry, row);
                                }
                            };
                        }
                        if (sanitizedConstraints instanceof DataIdQuery) {
                            DataIndexUtils.delete(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), scanCallback, adapter.getAdapterId(), ((DataIdQuery) sanitizedConstraints).getDataIds());
                        } else if (sanitizedConstraints instanceof DataIdRangeQuery) {
                            DataIndexUtils.delete(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), scanCallback, adapter.getAdapterId(), ((DataIdRangeQuery) sanitizedConstraints).getStartDataIdInclusive(), ((DataIdRangeQuery) sanitizedConstraints).getEndDataIdInclusive());
                        } else {
                            DataIndexUtils.delete(baseOperations, adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getFieldIdsAdapterPair(), queryOptions.getAggregation(), queryOptions.getAuthorizations(), scanCallback, adapter.getAdapterId());
                        }
                    }
                } else {
                    final String[] typeNames = indexAdapterPair.getRight().stream().map(a -> a.getAdapter().getTypeName()).toArray(k -> new String[k]);
                    LOGGER.warn("Data types '" + ArrayUtils.toString(typeNames) + "' do not have an index that satisfies the query");
                }
                continue;
            }
            final List<Short> adapterIdsToQuery = new ArrayList<>();
            // this only needs to be done once per index, not once per
            // adapter
            boolean queriedAllAdaptersByPrefix = false;
            // maintain a set of data IDs if deleting using secondary indexing
            for (final InternalDataAdapter adapter : indexAdapterPair.getRight()) {
                final Index index = indexAdapterPair.getLeft();
                final AdapterToIndexMapping indexMapping = indexMappingStore.getMapping(adapter.getAdapterId(), index.getName());
                memoryMappingStore.addAdapterIndexMapping(indexMapping);
                if (delete) {
                    final DataStoreCallbackManager callbackCache = new DataStoreCallbackManager(statisticsStore, queriedAdapters.add(adapter.getAdapterId()));
                    // want the stats to change
                    if (!(constraints instanceof InsertionIdQuery)) {
                        callbackCache.setPersistStats(baseOptions.isPersistDataStatistics());
                    } else {
                        callbackCache.setPersistStats(false);
                    }
                    deleteCallbacks.add(callbackCache);
                    if (deleteMode == DeletionMode.DELETE_WITH_DUPLICATES) {
                        final DeleteCallbackList<T, GeoWaveRow> delList = (DeleteCallbackList<T, GeoWaveRow>) callbackCache.getDeleteCallback(adapter, indexMapping, index);
                        final DuplicateDeletionCallback<T> dupDeletionCallback = new DuplicateDeletionCallback<>(this, adapter, indexMapping, index);
                        delList.addCallback(dupDeletionCallback);
                        if ((additionalIndicesToDelete != null) && (additionalIndicesToDelete.get(adapter.getAdapterId()) != null)) {
                            delList.addCallback(new DeleteOtherIndicesCallback<>(baseOperations, adapter, additionalIndicesToDelete.get(adapter.getAdapterId()), adapterStore, indexMappingStore, internalAdapterStore, queryOptions.getAuthorizations()));
                        }
                    }
                    final Map<Short, Set<ByteArray>> internalDataIdsToDelete = dataIdsToDelete;
                    queryOptions.setScanCallback(new ScanCallback<Object, GeoWaveRow>() {

                        @Override
                        public void entryScanned(final Object entry, final GeoWaveRow row) {
                            if (callback != null) {
                                callback.entryScanned(entry, row);
                            }
                            if (internalDataIdsToDelete != null) {
                                final ByteArray dataId = new ByteArray(row.getDataId());
                                Set<ByteArray> currentDataIdsToDelete = internalDataIdsToDelete.get(row.getAdapterId());
                                if (currentDataIdsToDelete == null) {
                                    synchronized (internalDataIdsToDelete) {
                                        currentDataIdsToDelete = internalDataIdsToDelete.get(row.getAdapterId());
                                        if (currentDataIdsToDelete == null) {
                                            currentDataIdsToDelete = Sets.newConcurrentHashSet();
                                            internalDataIdsToDelete.put(row.getAdapterId(), currentDataIdsToDelete);
                                        }
                                    }
                                }
                                currentDataIdsToDelete.add(dataId);
                            }
                            callbackCache.getDeleteCallback(adapter, indexMapping, index).entryDeleted(entry, row);
                        }
                    });
                }
                QueryConstraints adapterIndexConstraints;
                if (isConstraintsAdapterIndexSpecific) {
                    adapterIndexConstraints = ((AdapterAndIndexBasedQueryConstraints) sanitizedConstraints).createQueryConstraints(adapter, indexAdapterPair.getLeft(), indexMapping);
                    if (adapterIndexConstraints == null) {
                        continue;
                    }
                } else {
                    adapterIndexConstraints = sanitizedConstraints;
                }
                if (isAggregationAdapterIndexSpecific) {
                    queryOptions.setAggregation(((AdapterAndIndexBasedAggregation) aggregation.getRight()).createAggregation(adapter, indexMapping, index), aggregation.getLeft());
                }
                if (adapterIndexConstraints instanceof InsertionIdQuery) {
                    queryOptions.setLimit(-1);
                    results.add(queryInsertionId(adapter, index, (InsertionIdQuery) adapterIndexConstraints, dedupeFilter, queryOptions, tempAdapterStore, delete));
                    continue;
                } else if (adapterIndexConstraints instanceof PrefixIdQuery) {
                    if (!queriedAllAdaptersByPrefix) {
                        final PrefixIdQuery prefixIdQuery = (PrefixIdQuery) adapterIndexConstraints;
                        results.add(queryRowPrefix(index, prefixIdQuery.getPartitionKey(), prefixIdQuery.getSortKeyPrefix(), queryOptions, indexAdapterPair.getRight(), tempAdapterStore, delete));
                        queriedAllAdaptersByPrefix = true;
                    }
                    continue;
                } else if (isConstraintsAdapterIndexSpecific || isAggregationAdapterIndexSpecific) {
                    // can't query multiple adapters in the same scan
                    results.add(queryConstraints(Collections.singletonList(adapter.getAdapterId()), index, adapterIndexConstraints, dedupeFilter, queryOptions, tempAdapterStore, memoryMappingStore, delete));
                    continue;
                }
                // finally just add it to a list to query multiple adapters
                // in on scan
                adapterIdsToQuery.add(adapter.getAdapterId());
            }
            // in one query instance (one scanner) for efficiency
            if (adapterIdsToQuery.size() > 0) {
                results.add(queryConstraints(adapterIdsToQuery, indexAdapterPair.getLeft(), sanitizedConstraints, dedupeFilter, queryOptions, tempAdapterStore, memoryMappingStore, delete));
            }
            if (DeletionMode.DELETE_WITH_DUPLICATES.equals(deleteMode)) {
                // Make sure each index query has a clean dedupe filter so that entries from other indices
                // get deleted
                dedupeFilter = new DedupeFilter();
            }
        }
    }
    return new CloseableIteratorWrapper<>(new Closeable() {

        @Override
        public void close() throws IOException {
            for (final CloseableIterator<Object> result : results) {
                result.close();
            }
            for (final DataStoreCallbackManager c : deleteCallbacks) {
                c.close();
            }
            if ((dataIdsToDelete != null) && !dataIdsToDelete.isEmpty()) {
                if (baseOptions.isSecondaryIndexing()) {
                    deleteFromDataIndex(dataIdsToDelete, queryOptions.getAuthorizations());
                }
            }
        }
    }, Iterators.concat(new CastIterator<T>(results.iterator())));
}
Also used : FieldVisibilityCountValue(org.locationtech.geowave.core.store.statistics.index.FieldVisibilityCountStatistic.FieldVisibilityCountValue) Arrays(java.util.Arrays) MemoryPersistentAdapterStore(org.locationtech.geowave.core.store.memory.MemoryPersistentAdapterStore) Maps(com.beust.jcommander.internal.Maps) IndexDependentDataAdapter(org.locationtech.geowave.core.store.adapter.IndexDependentDataAdapter) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) RowWriter(org.locationtech.geowave.core.store.operations.RowWriter) PrefixIdQuery(org.locationtech.geowave.core.store.query.constraints.PrefixIdQuery) Statement(org.locationtech.geowave.core.store.query.gwql.statement.Statement) StatisticId(org.locationtech.geowave.core.store.statistics.StatisticId) MetadataQuery(org.locationtech.geowave.core.store.operations.MetadataQuery) AdapterIndexMappingStore(org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore) DataIndexReaderParamsBuilder(org.locationtech.geowave.core.store.operations.DataIndexReaderParamsBuilder) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Persistable(org.locationtech.geowave.core.index.persist.Persistable) GeoWaveMetadata(org.locationtech.geowave.core.store.entities.GeoWaveMetadata) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) InternalDataAdapter(org.locationtech.geowave.core.store.adapter.InternalDataAdapter) DataTypeStatisticType(org.locationtech.geowave.core.store.statistics.adapter.DataTypeStatisticType) Query(org.locationtech.geowave.core.store.api.Query) StatisticQuery(org.locationtech.geowave.core.store.api.StatisticQuery) DataTypeStatistic(org.locationtech.geowave.core.store.api.DataTypeStatistic) Set(java.util.Set) ByteArrayConstraints(org.locationtech.geowave.core.store.api.BinConstraints.ByteArrayConstraints) IngestCallbackList(org.locationtech.geowave.core.store.callback.IngestCallbackList) IndexStore(org.locationtech.geowave.core.store.index.IndexStore) StatisticUpdateCallback(org.locationtech.geowave.core.store.statistics.StatisticUpdateCallback) InternalStatisticsHelper(org.locationtech.geowave.core.store.statistics.InternalStatisticsHelper) IndexStatisticQuery(org.locationtech.geowave.core.store.statistics.query.IndexStatisticQuery) DeleteOtherIndicesCallback(org.locationtech.geowave.core.store.callback.DeleteOtherIndicesCallback) DefaultStatisticsProvider(org.locationtech.geowave.core.store.statistics.DefaultStatisticsProvider) ByteArray(org.locationtech.geowave.core.index.ByteArray) DataStoreProperty(org.locationtech.geowave.core.store.DataStoreProperty) StatisticType(org.locationtech.geowave.core.store.statistics.StatisticType) AggregationQuery(org.locationtech.geowave.core.store.api.AggregationQuery) DeleteCallbackList(org.locationtech.geowave.core.store.callback.DeleteCallbackList) BinConstraints(org.locationtech.geowave.core.store.api.BinConstraints) GeoWaveRowIteratorTransformer(org.locationtech.geowave.core.store.entities.GeoWaveRowIteratorTransformer) AdapterAndIndexBasedAggregation(org.locationtech.geowave.core.store.query.aggregate.AdapterAndIndexBasedAggregation) IndexStatisticType(org.locationtech.geowave.core.store.statistics.index.IndexStatisticType) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) QueryBuilder(org.locationtech.geowave.core.store.api.QueryBuilder) PropertyStore(org.locationtech.geowave.core.store.PropertyStore) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) DataStoreOperations(org.locationtech.geowave.core.store.operations.DataStoreOperations) FieldStatisticType(org.locationtech.geowave.core.store.statistics.field.FieldStatisticType) DataStore(org.locationtech.geowave.core.store.api.DataStore) MetadataReader(org.locationtech.geowave.core.store.operations.MetadataReader) IOException(java.io.IOException) MemoryAdapterIndexMappingStore(org.locationtech.geowave.core.store.memory.MemoryAdapterIndexMappingStore) DataStoreOptions(org.locationtech.geowave.core.store.DataStoreOptions) Writer(org.locationtech.geowave.core.store.api.Writer) IndexCompositeWriter(org.locationtech.geowave.core.store.index.writer.IndexCompositeWriter) MetadataWriter(org.locationtech.geowave.core.store.operations.MetadataWriter) CloseableIterator(org.locationtech.geowave.core.store.CloseableIterator) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) DifferingVisibilityCountValue(org.locationtech.geowave.core.store.statistics.index.DifferingVisibilityCountStatistic.DifferingVisibilityCountValue) LoggerFactory(org.slf4j.LoggerFactory) Aggregation(org.locationtech.geowave.core.store.api.Aggregation) ReaderParamsBuilder(org.locationtech.geowave.core.store.operations.ReaderParamsBuilder) StatisticValue(org.locationtech.geowave.core.store.api.StatisticValue) ResultSet(org.locationtech.geowave.core.store.query.gwql.ResultSet) GeoWaveRowMergingTransform(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingTransform) DataIdRangeQuery(org.locationtech.geowave.core.store.query.constraints.DataIdRangeQuery) Statistic(org.locationtech.geowave.core.store.api.Statistic) FieldStatisticQuery(org.locationtech.geowave.core.store.statistics.query.FieldStatisticQuery) FieldDescriptor(org.locationtech.geowave.core.store.adapter.FieldDescriptor) BaseDataStoreIngestDriver(org.locationtech.geowave.core.store.ingest.BaseDataStoreIngestDriver) GWQLParser(org.locationtech.geowave.core.store.query.gwql.parse.GWQLParser) FieldStatistic(org.locationtech.geowave.core.store.api.FieldStatistic) RowReader(org.locationtech.geowave.core.store.operations.RowReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IndexStatistic(org.locationtech.geowave.core.store.api.IndexStatistic) Collectors(java.util.stream.Collectors) ScanCallback(org.locationtech.geowave.core.store.callback.ScanCallback) DataTypeStatisticQuery(org.locationtech.geowave.core.store.statistics.query.DataTypeStatisticQuery) IndependentAdapterIndexWriter(org.locationtech.geowave.core.store.index.writer.IndependentAdapterIndexWriter) Sets(com.google.common.collect.Sets) List(java.util.List) VisibilityHandler(org.locationtech.geowave.core.store.api.VisibilityHandler) Entry(java.util.Map.Entry) DataIdQuery(org.locationtech.geowave.core.store.query.constraints.DataIdQuery) DataIndexUtils(org.locationtech.geowave.core.store.base.dataidx.DataIndexUtils) HashMap(java.util.HashMap) ArrayUtils(org.apache.commons.lang3.ArrayUtils) InsertionIdQuery(org.locationtech.geowave.core.store.query.constraints.InsertionIdQuery) NativeEntryIteratorWrapper(org.locationtech.geowave.core.store.util.NativeEntryIteratorWrapper) Iterators(com.google.common.collect.Iterators) HashSet(java.util.HashSet) DataStatisticsStore(org.locationtech.geowave.core.store.statistics.DataStatisticsStore) MetadataType(org.locationtech.geowave.core.store.operations.MetadataType) CloseableIteratorWrapper(org.locationtech.geowave.core.store.CloseableIteratorWrapper) Index(org.locationtech.geowave.core.store.api.Index) Logger(org.slf4j.Logger) IngestOptions(org.locationtech.geowave.core.store.api.IngestOptions) IngestCallback(org.locationtech.geowave.core.store.callback.IngestCallback) QueryConstraints(org.locationtech.geowave.core.store.query.constraints.QueryConstraints) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) DuplicateDeletionCallback(org.locationtech.geowave.core.store.callback.DuplicateDeletionCallback) Closeable(java.io.Closeable) TypeConstraintQuery(org.locationtech.geowave.core.store.query.constraints.TypeConstraintQuery) AdapterAndIndexBasedQueryConstraints(org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints) EverythingQuery(org.locationtech.geowave.core.store.query.constraints.EverythingQuery) Collections(java.util.Collections) MemoryAdapterIndexMappingStore(org.locationtech.geowave.core.store.memory.MemoryAdapterIndexMappingStore) Closeable(java.io.Closeable) AdapterAndIndexBasedQueryConstraints(org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints) ArrayList(java.util.ArrayList) QueryConstraints(org.locationtech.geowave.core.store.query.constraints.QueryConstraints) AdapterAndIndexBasedQueryConstraints(org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints) Index(org.locationtech.geowave.core.store.api.Index) DataIdRangeQuery(org.locationtech.geowave.core.store.query.constraints.DataIdRangeQuery) DeleteCallbackList(org.locationtech.geowave.core.store.callback.DeleteCallbackList) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) MemoryPersistentAdapterStore(org.locationtech.geowave.core.store.memory.MemoryPersistentAdapterStore) ByteArray(org.locationtech.geowave.core.index.ByteArray) IngestCallbackList(org.locationtech.geowave.core.store.callback.IngestCallbackList) DeleteCallbackList(org.locationtech.geowave.core.store.callback.DeleteCallbackList) ArrayList(java.util.ArrayList) List(java.util.List) DataIdQuery(org.locationtech.geowave.core.store.query.constraints.DataIdQuery) HashSet(java.util.HashSet) TypeConstraintQuery(org.locationtech.geowave.core.store.query.constraints.TypeConstraintQuery) EverythingQuery(org.locationtech.geowave.core.store.query.constraints.EverythingQuery) InsertionIdQuery(org.locationtech.geowave.core.store.query.constraints.InsertionIdQuery) CloseableIteratorWrapper(org.locationtech.geowave.core.store.CloseableIteratorWrapper) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) Set(java.util.Set) ResultSet(org.locationtech.geowave.core.store.query.gwql.ResultSet) HashSet(java.util.HashSet) AdapterToIndexMapping(org.locationtech.geowave.core.store.AdapterToIndexMapping) AdapterAndIndexBasedAggregation(org.locationtech.geowave.core.store.query.aggregate.AdapterAndIndexBasedAggregation) Aggregation(org.locationtech.geowave.core.store.api.Aggregation) InternalDataAdapter(org.locationtech.geowave.core.store.adapter.InternalDataAdapter) ScanCallback(org.locationtech.geowave.core.store.callback.ScanCallback) Pair(org.apache.commons.lang3.tuple.Pair) DuplicateDeletionCallback(org.locationtech.geowave.core.store.callback.DuplicateDeletionCallback) CloseableIterator(org.locationtech.geowave.core.store.CloseableIterator) IOException(java.io.IOException) NativeEntryIteratorWrapper(org.locationtech.geowave.core.store.util.NativeEntryIteratorWrapper) AdapterAndIndexBasedAggregation(org.locationtech.geowave.core.store.query.aggregate.AdapterAndIndexBasedAggregation) PrefixIdQuery(org.locationtech.geowave.core.store.query.constraints.PrefixIdQuery)

Example 4 with DedupeFilter

use of org.locationtech.geowave.core.store.query.filter.DedupeFilter in project geowave by locationtech.

the class DynamoDBReader method startRead.

private void startRead(final List<QueryRequest> requests, final String tableName, final boolean rowMerging, final boolean parallelDecode) {
    Iterator<Map<String, AttributeValue>> rawIterator;
    Predicate<DynamoDBRow> adapterIdFilter = null;
    final Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>> rawToDynamoDBRow = new Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>>() {

        @Override
        public Iterator<DynamoDBRow> apply(final Iterator<Map<String, AttributeValue>> input) {
            final Iterator<DynamoDBRow> rowIterator = Streams.stream(input).map(new DynamoDBRow.GuavaRowTranslationHelper()).filter(visibilityFilter).iterator();
            if (rowMerging) {
                return new GeoWaveRowMergingIterator<>(rowIterator);
            } else {
                // TODO: understand why there are duplicates coming back when there shouldn't be from
                // DynamoDB
                final DedupeFilter dedupe = new DedupeFilter();
                return Iterators.filter(rowIterator, row -> dedupe.applyDedupeFilter(row.getAdapterId(), new ByteArray(row.getDataId())));
            }
        }
    };
    if (!requests.isEmpty()) {
        if (ASYNC) {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeAsyncQueryRequest).iterator());
        } else {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeQueryRequest).iterator());
        }
    } else {
        if (ASYNC) {
            final ScanRequest request = new ScanRequest(tableName);
            rawIterator = new AsyncPaginatedScan(request, operations.getClient());
        } else {
            // query everything
            final ScanRequest request = new ScanRequest(tableName);
            final ScanResult scanResult = operations.getClient().scan(request);
            rawIterator = new LazyPaginatedScan(scanResult, request, operations.getClient());
            // filtering by adapter ID
            if ((readerParams.getAdapterIds() != null) && (readerParams.getAdapterIds().length > 0)) {
                adapterIdFilter = input -> ArrayUtils.contains(readerParams.getAdapterIds(), input.getAdapterId());
            }
        }
    }
    Iterator<DynamoDBRow> rowIter = rawToDynamoDBRow.apply(rawIterator);
    if (adapterIdFilter != null) {
        rowIter = Streams.stream(rowIter).filter(adapterIdFilter).iterator();
    }
    if (parallelDecode) {
        final ParallelDecoder<T> decoder = new SimpleParallelDecoder<>(rowTransformer, Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        try {
            decoder.startDecode();
        } catch (final Exception e) {
            Throwables.propagate(e);
        }
        iterator = decoder;
        closeable = decoder;
    } else {
        iterator = rowTransformer.apply(Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        closeable = null;
    }
}
Also used : Condition(com.amazonaws.services.dynamodbv2.model.Condition) ByteArray(org.locationtech.geowave.core.index.ByteArray) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) RecordReaderParams(org.locationtech.geowave.mapreduce.splits.RecordReaderParams) SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) GeoWaveRowIteratorTransformer(org.locationtech.geowave.core.store.entities.GeoWaveRowIteratorTransformer) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) ReaderParams(org.locationtech.geowave.core.store.operations.ReaderParams) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) ClientVisibilityFilter(org.locationtech.geowave.core.store.query.filter.ClientVisibilityFilter) ParallelDecoder(org.locationtech.geowave.core.store.operations.ParallelDecoder) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedQuery) DynamoDBUtils(org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) RowReader(org.locationtech.geowave.core.store.operations.RowReader) Predicate(java.util.function.Predicate) Collection(java.util.Collection) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) Throwables(com.google.common.base.Throwables) Streams(com.google.common.collect.Streams) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) Sets(com.google.common.collect.Sets) RangeReaderParams(org.locationtech.geowave.core.store.operations.RangeReaderParams) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) LazyPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedQuery) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan) ComparisonOperator(com.amazonaws.services.dynamodbv2.model.ComparisonOperator) List(java.util.List) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayUtils(org.locationtech.geowave.core.index.ByteArrayUtils) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) Function(java.util.function.Function) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) ByteArray(org.locationtech.geowave.core.index.ByteArray) Map(java.util.Map) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan)

Aggregations

DedupeFilter (org.locationtech.geowave.core.store.query.filter.DedupeFilter)4 GeoWaveRow (org.locationtech.geowave.core.store.entities.GeoWaveRow)3 Iterators (com.google.common.collect.Iterators)2 Lists (com.google.common.collect.Lists)2 Sets (com.google.common.collect.Sets)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 ArrayUtils (org.apache.commons.lang3.ArrayUtils)2 ByteArray (org.locationtech.geowave.core.index.ByteArray)2 CloseableIteratorWrapper (org.locationtech.geowave.core.store.CloseableIteratorWrapper)2 AdapterIndexMappingStore (org.locationtech.geowave.core.store.adapter.AdapterIndexMappingStore)2 InternalAdapterStore (org.locationtech.geowave.core.store.adapter.InternalAdapterStore)2 PersistentAdapterStore (org.locationtech.geowave.core.store.adapter.PersistentAdapterStore)2 Aggregation (org.locationtech.geowave.core.store.api.Aggregation)2 IndexStore (org.locationtech.geowave.core.store.index.IndexStore)2 AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)1 ComparisonOperator (com.amazonaws.services.dynamodbv2.model.ComparisonOperator)1 Condition (com.amazonaws.services.dynamodbv2.model.Condition)1