use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("time", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class MetricsMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
logger.info("doGetTableLayout - enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(METRIC_NAME_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("MyMetric").build());
GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "queryId", "default", new TableName(defaultSchema, "metrics"), new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET);
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
logger.info("doGetTableLayout - {}", res.getPartitions());
assertTrue(res.getPartitions().getRowCount() == 1);
logger.info("doGetTableLayout - exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class MetricsRecordHandlerTest method readMetricSamplesWithConstraint.
@Test
public void readMetricSamplesWithConstraint() throws Exception {
logger.info("readMetricSamplesWithConstraint: enter");
String namespace = "namespace";
String metricName = "metricName";
String statistic = "p90";
String period = "60";
String dimName = "dimName";
String dimValue = "dimValue";
List<Dimension> dimensions = Collections.singletonList(new Dimension().withName(dimName).withValue(dimValue));
int numMetrics = 10;
int numSamples = 10;
AtomicLong numCalls = new AtomicLong(0);
when(mockMetrics.getMetricData(any(GetMetricDataRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
numCalls.incrementAndGet();
return mockMetricData(invocation, numMetrics, numSamples);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, statistic));
constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
List<MetricStat> metricStats = new ArrayList<>();
metricStats.add(new MetricStat().withMetric(new Metric().withNamespace(namespace).withMetricName(metricName).withDimensions(dimensions)).withPeriod(60).withStat(statistic));
Split split = Split.newBuilder(spillLocation, keyFactory.create()).add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, MetricStatSerDe.serialize(metricStats)).add(METRIC_NAME_FIELD, metricName).add(NAMESPACE_FIELD, namespace).add(STATISTIC_FIELD, statistic).add(PERIOD_FIELD, period).build();
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE.getSchema(), split, new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("readMetricSamplesWithConstraint: rows[{}]", response.getRecordCount());
assertEquals(numCalls.get() * numMetrics * numSamples, response.getRecords().getRowCount());
logger.info("readMetricSamplesWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
logger.info("readMetricSamplesWithConstraint: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method enhancePartitionSchema.
/**
* Generates a partition schema with metadata derived from available predicates. This metadata will be
* copied to splits in the #doGetSplits call. At this point it is determined whether we can partition
* by hash key or fall back to a full table scan.
*
* @see GlueMetadataHandler
*/
@Override
public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request) {
// use the source table name from the schema if available (in case Glue table name != actual table name)
String tableName = getSourceTableName(request.getSchema());
if (tableName == null) {
tableName = request.getTableName().getTableName();
}
DynamoDBTable table = null;
try {
table = tableResolver.getTableMetadata(tableName);
} catch (TimeoutException e) {
throw new RuntimeException(e);
}
// add table name so we don't have to do case insensitive resolution again
partitionSchemaBuilder.addMetadata(TABLE_METADATA, table.getName());
Map<String, ValueSet> summary = request.getConstraints().getSummary();
List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
logger.info("using index: {}", index.getName());
String hashKeyName = index.getHashKey();
ValueSet hashKeyValueSet = summary.get(hashKeyName);
List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
DDBRecordMetadata recordMetadata = new DDBRecordMetadata(request.getSchema());
Set<String> columnsToIgnore = new HashSet<>();
List<AttributeValue> valueAccumulator = new ArrayList<>();
IncrementingValueNameProducer valueNameProducer = new IncrementingValueNameProducer();
if (!hashKeyValues.isEmpty()) {
// can "partition" on hash key
partitionSchemaBuilder.addField(hashKeyName, hashKeyValueSet.getType());
partitionSchemaBuilder.addMetadata(HASH_KEY_NAME_METADATA, hashKeyName);
columnsToIgnore.add(hashKeyName);
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, QUERY_PARTITION_TYPE);
if (!table.getName().equals(index.getName())) {
partitionSchemaBuilder.addMetadata(INDEX_METADATA, index.getName());
}
// add range key filter if there is one
Optional<String> rangeKey = index.getRangeKey();
if (rangeKey.isPresent()) {
String rangeKeyName = rangeKey.get();
if (summary.containsKey(rangeKeyName)) {
String rangeKeyFilter = DDBPredicateUtils.generateSingleColumnFilter(rangeKeyName, summary.get(rangeKeyName), valueAccumulator, valueNameProducer, recordMetadata);
partitionSchemaBuilder.addMetadata(RANGE_KEY_NAME_METADATA, rangeKeyName);
partitionSchemaBuilder.addMetadata(RANGE_KEY_FILTER_METADATA, rangeKeyFilter);
columnsToIgnore.add(rangeKeyName);
}
}
} else {
// always fall back to a scan
partitionSchemaBuilder.addField(SEGMENT_COUNT_METADATA, Types.MinorType.INT.getType());
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, SCAN_PARTITION_TYPE);
}
// We will exclude the columns with custom types from filter clause when querying/scanning DDB
// As those types are not natively supported by DDB or Glue
// So we have to filter the results after the query/scan result is returned
columnsToIgnore.addAll(recordMetadata.getNonComparableColumns());
precomputeAdditionalMetadata(columnsToIgnore, summary, valueAccumulator, valueNameProducer, partitionSchemaBuilder, recordMetadata);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandler method readWithConstraint.
/**
* Scans DocumentDB using the scan settings set on the requested Split by DocDBeMetadataHandler.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
TableName tableName = recordsRequest.getTableName();
Map<String, ValueSet> constraintSummary = recordsRequest.getConstraints().getSummary();
MongoClient client = getOrCreateConn(recordsRequest.getSplit());
MongoDatabase db = client.getDatabase(tableName.getSchemaName());
MongoCollection<Document> table = db.getCollection(tableName.getTableName());
Document query = QueryUtils.makeQuery(recordsRequest.getSchema(), constraintSummary);
Document output = QueryUtils.makeProjection(recordsRequest.getSchema());
logger.info("readWithConstraint: query[{}] projection[{}]", query, output);
final MongoCursor<Document> iterable = table.find(query).projection(output).batchSize(MONGO_QUERY_BATCH_SIZE).iterator();
long numRows = 0;
AtomicLong numResultRows = new AtomicLong(0);
while (iterable.hasNext() && queryStatusChecker.isQueryRunning()) {
numRows++;
spiller.writeRows((Block block, int rowNum) -> {
Document doc = iterable.next();
boolean matched = true;
for (Field nextField : recordsRequest.getSchema().getFields()) {
Object value = TypeUtils.coerce(nextField, doc.get(nextField.getName()));
Types.MinorType fieldType = Types.getMinorTypeForArrowType(nextField.getType());
try {
switch(fieldType) {
case LIST:
case STRUCT:
matched &= block.offerComplexValue(nextField.getName(), rowNum, DEFAULT_FIELD_RESOLVER, value);
break;
default:
matched &= block.offerValue(nextField.getName(), rowNum, value);
break;
}
if (!matched) {
return 0;
}
} catch (Exception ex) {
throw new RuntimeException("Error while processing field " + nextField.getName(), ex);
}
}
numResultRows.getAndIncrement();
return 1;
});
}
logger.info("readWithConstraint: numRows[{}] numResultRows[{}]", numRows, numResultRows.get());
}
Aggregations