Search in sources :

Example 1 with AsyncPaginatedScan

use of org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan in project geowave by locationtech.

the class DynamoDBReader method startRead.

private void startRead(final List<QueryRequest> requests, final String tableName, final boolean rowMerging, final boolean parallelDecode) {
    Iterator<Map<String, AttributeValue>> rawIterator;
    Predicate<DynamoDBRow> adapterIdFilter = null;
    final Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>> rawToDynamoDBRow = new Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>>() {

        @Override
        public Iterator<DynamoDBRow> apply(final Iterator<Map<String, AttributeValue>> input) {
            final Iterator<DynamoDBRow> rowIterator = Streams.stream(input).map(new DynamoDBRow.GuavaRowTranslationHelper()).filter(visibilityFilter).iterator();
            if (rowMerging) {
                return new GeoWaveRowMergingIterator<>(rowIterator);
            } else {
                // TODO: understand why there are duplicates coming back when there shouldn't be from
                // DynamoDB
                final DedupeFilter dedupe = new DedupeFilter();
                return Iterators.filter(rowIterator, row -> dedupe.applyDedupeFilter(row.getAdapterId(), new ByteArray(row.getDataId())));
            }
        }
    };
    if (!requests.isEmpty()) {
        if (ASYNC) {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeAsyncQueryRequest).iterator());
        } else {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeQueryRequest).iterator());
        }
    } else {
        if (ASYNC) {
            final ScanRequest request = new ScanRequest(tableName);
            rawIterator = new AsyncPaginatedScan(request, operations.getClient());
        } else {
            // query everything
            final ScanRequest request = new ScanRequest(tableName);
            final ScanResult scanResult = operations.getClient().scan(request);
            rawIterator = new LazyPaginatedScan(scanResult, request, operations.getClient());
            // filtering by adapter ID
            if ((readerParams.getAdapterIds() != null) && (readerParams.getAdapterIds().length > 0)) {
                adapterIdFilter = input -> ArrayUtils.contains(readerParams.getAdapterIds(), input.getAdapterId());
            }
        }
    }
    Iterator<DynamoDBRow> rowIter = rawToDynamoDBRow.apply(rawIterator);
    if (adapterIdFilter != null) {
        rowIter = Streams.stream(rowIter).filter(adapterIdFilter).iterator();
    }
    if (parallelDecode) {
        final ParallelDecoder<T> decoder = new SimpleParallelDecoder<>(rowTransformer, Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        try {
            decoder.startDecode();
        } catch (final Exception e) {
            Throwables.propagate(e);
        }
        iterator = decoder;
        closeable = decoder;
    } else {
        iterator = rowTransformer.apply(Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        closeable = null;
    }
}
Also used : Condition(com.amazonaws.services.dynamodbv2.model.Condition) ByteArray(org.locationtech.geowave.core.index.ByteArray) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) RecordReaderParams(org.locationtech.geowave.mapreduce.splits.RecordReaderParams) SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) GeoWaveRowIteratorTransformer(org.locationtech.geowave.core.store.entities.GeoWaveRowIteratorTransformer) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) ReaderParams(org.locationtech.geowave.core.store.operations.ReaderParams) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) ClientVisibilityFilter(org.locationtech.geowave.core.store.query.filter.ClientVisibilityFilter) ParallelDecoder(org.locationtech.geowave.core.store.operations.ParallelDecoder) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedQuery) DynamoDBUtils(org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) RowReader(org.locationtech.geowave.core.store.operations.RowReader) Predicate(java.util.function.Predicate) Collection(java.util.Collection) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) Throwables(com.google.common.base.Throwables) Streams(com.google.common.collect.Streams) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) Sets(com.google.common.collect.Sets) RangeReaderParams(org.locationtech.geowave.core.store.operations.RangeReaderParams) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) LazyPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedQuery) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan) ComparisonOperator(com.amazonaws.services.dynamodbv2.model.ComparisonOperator) List(java.util.List) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayUtils(org.locationtech.geowave.core.index.ByteArrayUtils) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) Function(java.util.function.Function) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) ByteArray(org.locationtech.geowave.core.index.ByteArray) Map(java.util.Map) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan)

Aggregations

AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)1 ComparisonOperator (com.amazonaws.services.dynamodbv2.model.ComparisonOperator)1 Condition (com.amazonaws.services.dynamodbv2.model.Condition)1 QueryRequest (com.amazonaws.services.dynamodbv2.model.QueryRequest)1 QueryResult (com.amazonaws.services.dynamodbv2.model.QueryResult)1 ScanRequest (com.amazonaws.services.dynamodbv2.model.ScanRequest)1 ScanResult (com.amazonaws.services.dynamodbv2.model.ScanResult)1 Throwables (com.google.common.base.Throwables)1 Iterators (com.google.common.collect.Iterators)1 Lists (com.google.common.collect.Lists)1 Sets (com.google.common.collect.Sets)1 Streams (com.google.common.collect.Streams)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Function (java.util.function.Function)1 Predicate (java.util.function.Predicate)1