Search in sources :

Example 1 with LazyPaginatedScan

use of org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan in project geowave by locationtech.

the class DynamoDBReader method startRead.

private void startRead(final List<QueryRequest> requests, final String tableName, final boolean rowMerging, final boolean parallelDecode) {
    Iterator<Map<String, AttributeValue>> rawIterator;
    Predicate<DynamoDBRow> adapterIdFilter = null;
    final Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>> rawToDynamoDBRow = new Function<Iterator<Map<String, AttributeValue>>, Iterator<DynamoDBRow>>() {

        @Override
        public Iterator<DynamoDBRow> apply(final Iterator<Map<String, AttributeValue>> input) {
            final Iterator<DynamoDBRow> rowIterator = Streams.stream(input).map(new DynamoDBRow.GuavaRowTranslationHelper()).filter(visibilityFilter).iterator();
            if (rowMerging) {
                return new GeoWaveRowMergingIterator<>(rowIterator);
            } else {
                // TODO: understand why there are duplicates coming back when there shouldn't be from
                // DynamoDB
                final DedupeFilter dedupe = new DedupeFilter();
                return Iterators.filter(rowIterator, row -> dedupe.applyDedupeFilter(row.getAdapterId(), new ByteArray(row.getDataId())));
            }
        }
    };
    if (!requests.isEmpty()) {
        if (ASYNC) {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeAsyncQueryRequest).iterator());
        } else {
            rawIterator = Iterators.concat(requests.parallelStream().map(this::executeQueryRequest).iterator());
        }
    } else {
        if (ASYNC) {
            final ScanRequest request = new ScanRequest(tableName);
            rawIterator = new AsyncPaginatedScan(request, operations.getClient());
        } else {
            // query everything
            final ScanRequest request = new ScanRequest(tableName);
            final ScanResult scanResult = operations.getClient().scan(request);
            rawIterator = new LazyPaginatedScan(scanResult, request, operations.getClient());
            // filtering by adapter ID
            if ((readerParams.getAdapterIds() != null) && (readerParams.getAdapterIds().length > 0)) {
                adapterIdFilter = input -> ArrayUtils.contains(readerParams.getAdapterIds(), input.getAdapterId());
            }
        }
    }
    Iterator<DynamoDBRow> rowIter = rawToDynamoDBRow.apply(rawIterator);
    if (adapterIdFilter != null) {
        rowIter = Streams.stream(rowIter).filter(adapterIdFilter).iterator();
    }
    if (parallelDecode) {
        final ParallelDecoder<T> decoder = new SimpleParallelDecoder<>(rowTransformer, Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        try {
            decoder.startDecode();
        } catch (final Exception e) {
            Throwables.propagate(e);
        }
        iterator = decoder;
        closeable = decoder;
    } else {
        iterator = rowTransformer.apply(Iterators.transform(rowIter, r -> (GeoWaveRow) r));
        closeable = null;
    }
}
Also used : Condition(com.amazonaws.services.dynamodbv2.model.Condition) ByteArray(org.locationtech.geowave.core.index.ByteArray) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) RecordReaderParams(org.locationtech.geowave.mapreduce.splits.RecordReaderParams) SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) GeoWaveRowIteratorTransformer(org.locationtech.geowave.core.store.entities.GeoWaveRowIteratorTransformer) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) ReaderParams(org.locationtech.geowave.core.store.operations.ReaderParams) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) ClientVisibilityFilter(org.locationtech.geowave.core.store.query.filter.ClientVisibilityFilter) ParallelDecoder(org.locationtech.geowave.core.store.operations.ParallelDecoder) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedQuery) DynamoDBUtils(org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) RowReader(org.locationtech.geowave.core.store.operations.RowReader) Predicate(java.util.function.Predicate) Collection(java.util.Collection) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) Throwables(com.google.common.base.Throwables) Streams(com.google.common.collect.Streams) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) Sets(com.google.common.collect.Sets) RangeReaderParams(org.locationtech.geowave.core.store.operations.RangeReaderParams) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) LazyPaginatedQuery(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedQuery) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan) ComparisonOperator(com.amazonaws.services.dynamodbv2.model.ComparisonOperator) List(java.util.List) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayUtils(org.locationtech.geowave.core.index.ByteArrayUtils) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) GeoWaveRow(org.locationtech.geowave.core.store.entities.GeoWaveRow) AsyncPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.AsyncPaginatedScan) SimpleParallelDecoder(org.locationtech.geowave.core.store.operations.SimpleParallelDecoder) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) DynamoDBRow(org.locationtech.geowave.datastore.dynamodb.DynamoDBRow) Function(java.util.function.Function) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) DedupeFilter(org.locationtech.geowave.core.store.query.filter.DedupeFilter) Iterator(java.util.Iterator) GeoWaveRowMergingIterator(org.locationtech.geowave.core.store.entities.GeoWaveRowMergingIterator) ByteArray(org.locationtech.geowave.core.index.ByteArray) Map(java.util.Map) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan)

Example 2 with LazyPaginatedScan

use of org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan in project geowave by locationtech.

the class DynamoDBMetadataReader method query.

@Override
public CloseableIterator<GeoWaveMetadata> query(final MetadataQuery query) {
    final String tableName = operations.getMetadataTableName(metadataType);
    final boolean needsVisibility = metadataType.isStatValues() && operations.getOptions().getBaseOptions().isVisibilityEnabled();
    final Iterator<Map<String, AttributeValue>> iterator;
    if (!query.hasPrimaryIdRanges()) {
        if (query.hasPrimaryId() && query.isExact()) {
            final QueryRequest queryRequest = new QueryRequest(tableName);
            if (query.hasSecondaryId()) {
                queryRequest.withFilterExpression(DynamoDBOperations.METADATA_SECONDARY_ID_KEY + " = :secVal").addExpressionAttributeValuesEntry(":secVal", new AttributeValue().withB(ByteBuffer.wrap(query.getSecondaryId())));
            }
            queryRequest.withKeyConditionExpression(DynamoDBOperations.METADATA_PRIMARY_ID_KEY + " = :priVal").addExpressionAttributeValuesEntry(":priVal", new AttributeValue().withB(ByteBuffer.wrap(query.getPrimaryId())));
            final QueryResult queryResult = operations.getClient().query(queryRequest);
            return wrapIterator(queryResult.getItems().iterator(), query, needsVisibility);
        }
        final ScanRequest scan = new ScanRequest(tableName);
        if (query.hasPrimaryId()) {
            scan.addScanFilterEntry(DynamoDBOperations.METADATA_PRIMARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(query.getPrimaryId()))).withComparisonOperator(ComparisonOperator.BEGINS_WITH));
        }
        if (query.hasSecondaryId()) {
            scan.addScanFilterEntry(DynamoDBOperations.METADATA_SECONDARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(query.getSecondaryId()))).withComparisonOperator(ComparisonOperator.EQ));
        }
        final ScanResult scanResult = operations.getClient().scan(scan);
        iterator = new LazyPaginatedScan(scanResult, scan, operations.getClient());
    } else {
        iterator = Iterators.concat(Arrays.stream(query.getPrimaryIdRanges()).map(r -> {
            final ScanRequest scan = new ScanRequest(tableName);
            if (query.hasSecondaryId()) {
                scan.addScanFilterEntry(DynamoDBOperations.METADATA_SECONDARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(query.getSecondaryId()))).withComparisonOperator(ComparisonOperator.EQ));
            }
            if (r.getStart() != null) {
                if (r.getEnd() != null) {
                    scan.addScanFilterEntry(DynamoDBOperations.METADATA_PRIMARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(r.getStart())), new AttributeValue().withB(ByteBuffer.wrap(ByteArrayUtils.getNextInclusive(r.getEnd())))).withComparisonOperator(ComparisonOperator.BETWEEN));
                } else {
                    scan.addScanFilterEntry(DynamoDBOperations.METADATA_PRIMARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(r.getStart()))).withComparisonOperator(ComparisonOperator.GE));
                }
            } else if (r.getEnd() != null) {
                scan.addScanFilterEntry(DynamoDBOperations.METADATA_PRIMARY_ID_KEY, new Condition().withAttributeValueList(new AttributeValue().withB(ByteBuffer.wrap(r.getEndAsNextPrefix()))).withComparisonOperator(ComparisonOperator.LT));
            }
            final ScanResult scanResult = operations.getClient().scan(scan);
            return new LazyPaginatedScan(scanResult, scan, operations.getClient());
        }).iterator());
    }
    return wrapIterator(iterator, query, needsVisibility);
}
Also used : Condition(com.amazonaws.services.dynamodbv2.model.Condition) Condition(com.amazonaws.services.dynamodbv2.model.Condition) Arrays(java.util.Arrays) NoopClosableIteratorWrapper(org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils.NoopClosableIteratorWrapper) DynamoDBUtils(org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils) Iterator(java.util.Iterator) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) MetadataReader(org.locationtech.geowave.core.store.operations.MetadataReader) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) ByteBuffer(java.nio.ByteBuffer) MetadataQuery(org.locationtech.geowave.core.store.operations.MetadataQuery) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) Iterators(com.google.common.collect.Iterators) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan) ComparisonOperator(com.amazonaws.services.dynamodbv2.model.ComparisonOperator) MetadataType(org.locationtech.geowave.core.store.operations.MetadataType) CloseableIterator(org.locationtech.geowave.core.store.CloseableIterator) CloseableIteratorWrapper(org.locationtech.geowave.core.store.CloseableIteratorWrapper) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ByteArrayUtils(org.locationtech.geowave.core.index.ByteArrayUtils) MetadataIterators(org.locationtech.geowave.core.store.metadata.MetadataIterators) GeoWaveMetadata(org.locationtech.geowave.core.store.entities.GeoWaveMetadata) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) Map(java.util.Map) LazyPaginatedScan(org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan)

Aggregations

AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)2 ComparisonOperator (com.amazonaws.services.dynamodbv2.model.ComparisonOperator)2 Condition (com.amazonaws.services.dynamodbv2.model.Condition)2 QueryRequest (com.amazonaws.services.dynamodbv2.model.QueryRequest)2 QueryResult (com.amazonaws.services.dynamodbv2.model.QueryResult)2 ScanRequest (com.amazonaws.services.dynamodbv2.model.ScanRequest)2 ScanResult (com.amazonaws.services.dynamodbv2.model.ScanResult)2 Iterators (com.google.common.collect.Iterators)2 ByteBuffer (java.nio.ByteBuffer)2 Iterator (java.util.Iterator)2 Map (java.util.Map)2 ByteArrayUtils (org.locationtech.geowave.core.index.ByteArrayUtils)2 DynamoDBUtils (org.locationtech.geowave.datastore.dynamodb.util.DynamoDBUtils)2 LazyPaginatedScan (org.locationtech.geowave.datastore.dynamodb.util.LazyPaginatedScan)2 Throwables (com.google.common.base.Throwables)1 Lists (com.google.common.collect.Lists)1 Sets (com.google.common.collect.Sets)1 Streams (com.google.common.collect.Streams)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1