use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class BlockTest method constrainedBlockTest.
@Test
public void constrainedBlockTest() throws Exception {
Schema schema = SchemaBuilder.newBuilder().addIntField("col1").addIntField("col2").build();
Block block = allocator.createBlock(schema);
ValueSet col1Constraint = EquatableValueSet.newBuilder(allocator, Types.MinorType.INT.getType(), true, false).add(10).build();
Constraints constraints = new Constraints(Collections.singletonMap("col1", col1Constraint));
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, constraints)) {
block.constrain(constraintEvaluator);
assertTrue(block.setValue("col1", 0, 10));
assertTrue(block.offerValue("col1", 0, 10));
assertFalse(block.setValue("col1", 0, 11));
assertFalse(block.offerValue("col1", 0, 11));
assertTrue(block.offerValue("unkown_col", 0, 10));
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class BigQueryRecordHandlerTest method getObjectFromFieldValue.
@Test
public void getObjectFromFieldValue() throws Exception {
org.apache.arrow.vector.types.pojo.Schema testSchema = SchemaBuilder.newBuilder().addDateDayField("datecol").addDateMilliField("datetimecol").addStringField("timestampcol").build();
try (ReadRecordsRequest request = new ReadRecordsRequest(federatedIdentity, BigQueryTestUtils.PROJECT_1_NAME, "queryId", new TableName("dataset1", "table1"), testSchema, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(bucket).withPrefix(prefix).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).build(), new Constraints(Collections.EMPTY_MAP), // This is ignored when directly calling readWithConstraints.
0, 0)) {
// This is ignored when directly calling readWithConstraints.
// Always return try for the evaluator to keep all rows.
ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
return true;
});
// added schema with columns datecol, datetimecol, timestampcol
List<com.google.cloud.bigquery.Field> testSchemaFields = Arrays.asList(com.google.cloud.bigquery.Field.of("datecol", LegacySQLTypeName.DATE), com.google.cloud.bigquery.Field.of("datetimecol", LegacySQLTypeName.DATETIME), com.google.cloud.bigquery.Field.of("timestampcol", LegacySQLTypeName.TIMESTAMP));
com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(testSchemaFields);
// mocked table rows
List<FieldValue> firstRowValues = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2016-02-05"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2021-10-30T10:10:10"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2014-12-03T12:30:00.450Z"));
FieldValueList firstRow = FieldValueList.of(firstRowValues, FieldList.of(testSchemaFields));
List<FieldValueList> tableRows = Arrays.asList(firstRow);
Page<FieldValueList> fieldValueList = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
// Mock out the Google BigQuery Job.
Job mockBigQueryJob = mock(Job.class);
when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
when(mockBigQueryJob.getQueryResults()).thenReturn(result);
when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
// Execute the test
bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
PowerMockito.mockStatic(System.class);
PowerMockito.when(System.getenv(anyString())).thenReturn("test");
logger.info("Project Name: " + BigQueryUtils.getProjectName(request.getCatalogName()));
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandlerTest method readWithConstraint.
@Test
public void readWithConstraint() throws SQLException {
ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
TableName inputTableName = new TableName("testSchema", "testTable");
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema fieldSchema = expectedSchemaBuilder.build();
BlockAllocator allocator = new BlockAllocatorImpl();
S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
String[] schema = { "testCol1", "testCol2" };
int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
AtomicInteger rowNumber = new AtomicInteger(-1);
ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
int n = byteArrayInputStream.available();
byte[] bytes = new byte[n];
byteArrayInputStream.read(bytes, 0, n);
String data = new String(bytes, StandardCharsets.UTF_8);
Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
return new PutObjectResult();
});
this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method readTableTest.
@Test
public void readTableTest() {
GetTableRequest request = new GetTableRequest(identity, expectedQuery, expectedCatalog, expectedTableName);
GetTableResponse response = provider.getTable(allocator, request);
assertTrue(response.getSchema().getFields().size() > 1);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(idField, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(idValue).build());
Constraints constraints = new Constraints(constraintsMap);
ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, response.getSchema(), constraints);
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("bucket").withPrefix("prefix").withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest readRequest = new ReadRecordsRequest(identity, expectedCatalog, "queryId", expectedTableName, response.getSchema(), Split.newBuilder(spillLocation, keyFactory.create()).build(), constraints, 100_000_000, 100_000_000);
SpillConfig spillConfig = SpillConfig.newBuilder().withSpillLocation(spillLocation).withMaxBlockBytes(3_000_000).withMaxInlineBlockBytes(0).withRequestId("queryid").withEncryptionKey(keyFactory.create()).build();
setUpRead();
BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, response.getSchema(), evaluator);
provider.readWithConstraint(spiller, readRequest, queryStatusChecker);
validateRead(response.getSchema(), blockSpillReader, spiller.getSpillLocations(), spillConfig.getEncryptionKey());
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator in project aws-athena-query-federation by awslabs.
the class MetricsMetadataHandler method doGetSplits.
/**
* Each 'metric' in cloudwatch is uniquely identified by a quad of Namespace, List<Dimension>, MetricName, Statistic. If the
* query is for the METRIC_TABLE we return a single split. If the query is for actual metrics data, we start forming batches
* of metrics now that will form the basis of GetMetricData requests during readSplits.
*
* @see MetadataHandler
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) throws Exception {
validateTable(getSplitsRequest.getTableName());
// Handle requests for the METRIC_TABLE which requires only 1 split to list available metrics.
if (METRIC_TABLE_NAME.equals(getSplitsRequest.getTableName().getTableName())) {
// The request is just for meta-data about what metrics exist.
Split metricsSplit = Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()).build();
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), metricsSplit);
}
// handle generating splits for reading actual metrics data.
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(blockAllocator, METRIC_DATA_TABLE.getSchema(), getSplitsRequest.getConstraints())) {
ListMetricsRequest listMetricsRequest = new ListMetricsRequest();
MetricUtils.pushDownPredicate(getSplitsRequest.getConstraints(), listMetricsRequest);
listMetricsRequest.setNextToken(getSplitsRequest.getContinuationToken());
String period = getPeriodFromConstraint(getSplitsRequest.getConstraints());
Set<Split> splits = new HashSet<>();
ListMetricsResult result = invoker.invoke(() -> metrics.listMetrics(listMetricsRequest));
List<MetricStat> metricStats = new ArrayList<>(100);
for (Metric nextMetric : result.getMetrics()) {
for (String nextStatistic : STATISTICS) {
if (MetricUtils.applyMetricConstraints(constraintEvaluator, nextMetric, nextStatistic)) {
metricStats.add(new MetricStat().withMetric(new Metric().withNamespace(nextMetric.getNamespace()).withMetricName(nextMetric.getMetricName()).withDimensions(nextMetric.getDimensions())).withPeriod(Integer.valueOf(period)).withStat(nextStatistic));
}
}
}
if (CollectionUtils.isNullOrEmpty(metricStats)) {
logger.info("No metric stats present after filtering predicates.");
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
List<List<MetricStat>> partitions = Lists.partition(metricStats, calculateSplitSize(metricStats.size()));
for (List<MetricStat> partition : partitions) {
String serializedMetricStats = MetricStatSerDe.serialize(partition);
splits.add(Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()).add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, serializedMetricStats).build());
}
String continuationToken = null;
if (result.getNextToken() != null && !result.getNextToken().equalsIgnoreCase(listMetricsRequest.getNextToken())) {
continuationToken = result.getNextToken();
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken);
}
}
Aggregations