use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class MetricUtilsTest method applyMetricConstraints.
@Test
public void applyMetricConstraints() {
Schema schema = SchemaBuilder.newBuilder().addStringField(NAMESPACE_FIELD).addStringField(METRIC_NAME_FIELD).addStringField(STATISTIC_FIELD).addStringField(DIMENSION_NAME_FIELD).addStringField(DIMENSION_VALUE_FIELD).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, "match1"));
constraintsMap.put(METRIC_NAME_FIELD, makeStringEquals(allocator, "match2"));
constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, "match3"));
constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, "match4"));
constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, "match5"));
ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, new Constraints(constraintsMap));
Metric metric = new Metric().withNamespace("match1").withMetricName("match2").withDimensions(new Dimension().withName("match4").withValue("match5"));
String statistic = "match3";
assertTrue(MetricUtils.applyMetricConstraints(constraintEvaluator, metric, statistic));
assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withNamespace("no_match"), statistic));
assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withMetricName("no_match"), statistic));
assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withDimensions(Collections.singletonList(new Dimension().withName("no_match").withValue("match5"))), statistic));
assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withDimensions(Collections.singletonList(new Dimension().withName("match4").withValue("no_match"))), statistic));
assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric), "no_match"));
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class BigQueryRecordHandlerTest method testReadWithConstraint.
@Test
public void testReadWithConstraint() throws Exception {
try (ReadRecordsRequest request = new ReadRecordsRequest(federatedIdentity, BigQueryTestUtils.PROJECT_1_NAME, "queryId", new TableName("dataset1", "table1"), BigQueryTestUtils.getBlockTestSchema(), Split.newBuilder(S3SpillLocation.newBuilder().withBucket(bucket).withPrefix(prefix).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).build(), new Constraints(Collections.EMPTY_MAP), // This is ignored when directly calling readWithConstraints.
0, 0)) {
// This is ignored when directly calling readWithConstraints.
// Always return try for the evaluator to keep all rows.
ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
return true;
});
// Populate the schema and data that the mocked Google BigQuery client will return.
com.google.cloud.bigquery.Schema tableSchema = BigQueryTestUtils.getTestSchema();
List<FieldValueList> tableRows = Arrays.asList(BigQueryTestUtils.getBigQueryFieldValueList(false, 1000, "test1", 123123.12312), BigQueryTestUtils.getBigQueryFieldValueList(true, 500, "test2", 5345234.22111), BigQueryTestUtils.getBigQueryFieldValueList(false, 700, "test3", 324324.23423), BigQueryTestUtils.getBigQueryFieldValueList(true, 900, null, null), BigQueryTestUtils.getBigQueryFieldValueList(null, null, "test5", 2342.234234), BigQueryTestUtils.getBigQueryFieldValueList(true, 1200, "test6", 1123.12312), BigQueryTestUtils.getBigQueryFieldValueList(false, 100, "test7", 1313.12312), BigQueryTestUtils.getBigQueryFieldValueList(true, 120, "test8", 12313.1312), BigQueryTestUtils.getBigQueryFieldValueList(false, 300, "test9", 12323.1312));
Page<FieldValueList> fieldValueList = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
// Mock out the Google BigQuery Job.
Job mockBigQueryJob = mock(Job.class);
when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
when(mockBigQueryJob.getQueryResults()).thenReturn(result);
when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
// Execute the test
bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
PowerMockito.mockStatic(System.class);
PowerMockito.when(System.getenv(anyString())).thenReturn("test");
logger.info("Project Name: " + BigQueryUtils.getProjectName(request.getCatalogName()));
// Ensure that there was a spill so that we can read the spilled block.
assertTrue(spillWriter.spilled());
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class MetadataHandler method doGetTableLayout.
/**
* Used to get the partitions that must be read from the request table in order to satisfy the requested predicate.
*
* @param allocator Tool for creating and managing Apache Arrow Blocks.
* @param request Provides details of the catalog, database, and table being queried as well as any filter predicate.
* @return A GetTableLayoutResponse which primarily contains:
* 1. An Apache Arrow Block with 0 or more partitions to read. 0 partitions implies there are 0 rows to read.
* 2. Set<String> of partition column names which should correspond to columns in your Apache Arrow Block.
* @note Partitions are opaque to Amazon Athena in that it does not understand their contents, just that it must call
* doGetSplits(...) for each partition you return in order to determine which reads to perform and if those reads
* can be parallelized. This means the contents of this response are more for you than they are for Athena.
* @note Partitions are partially opaque to Amazon Athena in that it only understands your partition columns and
* how to filter out partitions that do not meet the query's constraints. Any additional columns you add to the
* partition data are ignored by Athena but passed on to calls on GetSplits.
*/
public GetTableLayoutResponse doGetTableLayout(final BlockAllocator allocator, final GetTableLayoutRequest request) throws Exception {
SchemaBuilder constraintSchema = new SchemaBuilder().newBuilder();
SchemaBuilder partitionSchemaBuilder = new SchemaBuilder().newBuilder();
/**
* Add our partition columns to the response schema so the engine knows how to interpret the list of
* partitions we are going to return.
*/
for (String nextPartCol : request.getPartitionCols()) {
Field partitionCol = request.getSchema().findField(nextPartCol);
partitionSchemaBuilder.addField(nextPartCol, partitionCol.getType());
constraintSchema.addField(nextPartCol, partitionCol.getType());
}
enhancePartitionSchema(partitionSchemaBuilder, request);
Schema partitionSchema = partitionSchemaBuilder.build();
if (partitionSchema.getFields().isEmpty() && partitionSchema.getCustomMetadata().isEmpty()) {
// Even though our table doesn't support complex layouts, partitioning or metadata, we need to convey that there is at least
// 1 partition to read as part of the query or Athena will assume partition pruning found no candidate layouts to read.
Block partitions = BlockUtils.newBlock(allocator, PARTITION_ID_COL, Types.MinorType.INT.getType(), 1);
return new GetTableLayoutResponse(request.getCatalogName(), request.getTableName(), partitions);
}
/**
* Now use the constraint that was in the request to do some partition pruning. Here we are just
* generating some fake values for the partitions but in a real implementation you'd use your metastore
* or knowledge of the actual table's physical layout to do this.
*/
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, constraintSchema.build(), request.getConstraints());
QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId())) {
Block partitions = allocator.createBlock(partitionSchemaBuilder.build());
partitions.constrain(constraintEvaluator);
SimpleBlockWriter blockWriter = new SimpleBlockWriter(partitions);
getPartitions(blockWriter, request, queryStatusChecker);
return new GetTableLayoutResponse(request.getCatalogName(), request.getTableName(), partitions);
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class RecordHandler method doReadRecords.
/**
* Used to read the row data associated with the provided Split.
*
* @param allocator Tool for creating and managing Apache Arrow Blocks.
* @param request Details of the read request, including:
* 1. The Split
* 2. The Catalog, Database, and Table the read request is for.
* 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @return A RecordResponse which either a ReadRecordsResponse or a RemoteReadRecordsResponse containing the row
* data for the requested Split.
*/
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
logger.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
SpillConfig spillConfig = getSpillConfig(request);
try (ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, request.getSchema(), request.getConstraints());
S3BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, request.getSchema(), evaluator);
QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId())) {
readWithConstraint(spiller, request, queryStatusChecker);
if (!spiller.spilled()) {
return new ReadRecordsResponse(request.getCatalogName(), spiller.getBlock());
} else {
return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
}
}
}
Aggregations