use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class HbaseMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() throws IOException {
List<HRegionInfo> regionServers = new ArrayList<>();
regionServers.add(TestUtils.makeRegion(1, "schema1", "table1"));
regionServers.add(TestUtils.makeRegion(2, "schema1", "table1"));
regionServers.add(TestUtils.makeRegion(3, "schema1", "table1"));
regionServers.add(TestUtils.makeRegion(4, "schema1", "table1"));
when(mockClient.getTableRegions(any())).thenReturn(regionServers);
List<String> partitionCols = new ArrayList<>();
Block partitions = BlockUtils.newBlock(allocator, "partitionId", Types.MinorType.INT.getType(), 0);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertTrue("Continuation criteria violated", response.getSplits().size() == 4);
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
List<Result> results = TestUtils.makeResults(100);
ResultScanner mockScanner = mock(ResultScanner.class);
when(mockScanner.iterator()).thenReturn(results.iterator());
when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
return processor.scan(mockScanner);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 1);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class BigQueryMetadataHandlerTest method testDoGetSplits.
@Test
public void testDoGetSplits() throws Exception {
BlockAllocator blockAllocator = new BlockAllocatorImpl();
PowerMockito.mockStatic(BigQueryUtils.class);
when(BigQueryUtils.getBigQueryClient()).thenReturn(bigQuery);
when(BigQueryUtils.getEnvVar("concurrencyLimit")).thenReturn("10");
GetSplitsRequest request = new GetSplitsRequest(federatedIdentity, QUERY_ID, CATALOG, TABLE_NAME, mock(Block.class), Collections.<String>emptyList(), new Constraints(new HashMap<>()), null);
// added schema with integer column countCol
List<Field> testSchemaFields = Arrays.asList(Field.of("countCol", LegacySQLTypeName.INTEGER));
com.google.cloud.bigquery.Schema tableSchema = Schema.of(testSchemaFields);
// mocked table row count as 15
List<FieldValue> bigQueryRowValue = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "15"));
FieldValueList fieldValueList = FieldValueList.of(bigQueryRowValue, FieldList.of(testSchemaFields));
List<FieldValueList> tableRows = Arrays.asList(fieldValueList);
when(job.isDone()).thenReturn(false).thenReturn(true);
Page<FieldValueList> pageNoSchema = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), pageNoSchema);
when(job.getQueryResults()).thenReturn(result);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
GetSplitsResponse response = bigQueryMetadataHandler.doGetSplits(blockAllocator, request);
assertNotNull(response);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class ReadRecordsRequestSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
Constraints constraints = new Constraints(constraintsMap);
Block partitions = allocator.createBlock(schema);
int num_partitions = 10;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
}
partitions.setRowCount(num_partitions);
SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("athena-virtuoso-test").withPrefix("lambda-spill").withQueryId("test-query-id").withSplitId("test-split-id").withIsDirectory(true).build();
EncryptionKey encryptionKey = new EncryptionKey("test-key".getBytes(), "test-nonce".getBytes());
Split split = Split.newBuilder(spillLocation, encryptionKey).add("year", "2017").add("month", "11").add("day", "1").build();
expected = new ReadRecordsRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), schema, split, constraints, 100_000_000_000L, 100_000_000_000L);
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsRequest.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class BigQueryRecordHandlerTest method getObjectFromFieldValue.
@Test
public void getObjectFromFieldValue() throws Exception {
org.apache.arrow.vector.types.pojo.Schema testSchema = SchemaBuilder.newBuilder().addDateDayField("datecol").addDateMilliField("datetimecol").addStringField("timestampcol").build();
try (ReadRecordsRequest request = new ReadRecordsRequest(federatedIdentity, BigQueryTestUtils.PROJECT_1_NAME, "queryId", new TableName("dataset1", "table1"), testSchema, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(bucket).withPrefix(prefix).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).build(), new Constraints(Collections.EMPTY_MAP), // This is ignored when directly calling readWithConstraints.
0, 0)) {
// This is ignored when directly calling readWithConstraints.
// Always return try for the evaluator to keep all rows.
ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
return true;
});
// added schema with columns datecol, datetimecol, timestampcol
List<com.google.cloud.bigquery.Field> testSchemaFields = Arrays.asList(com.google.cloud.bigquery.Field.of("datecol", LegacySQLTypeName.DATE), com.google.cloud.bigquery.Field.of("datetimecol", LegacySQLTypeName.DATETIME), com.google.cloud.bigquery.Field.of("timestampcol", LegacySQLTypeName.TIMESTAMP));
com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(testSchemaFields);
// mocked table rows
List<FieldValue> firstRowValues = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2016-02-05"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2021-10-30T10:10:10"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2014-12-03T12:30:00.450Z"));
FieldValueList firstRow = FieldValueList.of(firstRowValues, FieldList.of(testSchemaFields));
List<FieldValueList> tableRows = Arrays.asList(firstRow);
Page<FieldValueList> fieldValueList = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
// Mock out the Google BigQuery Job.
Job mockBigQueryJob = mock(Job.class);
when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
when(mockBigQueryJob.getQueryResults()).thenReturn(result);
when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
// Execute the test
bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
PowerMockito.mockStatic(System.class);
PowerMockito.when(System.getenv(anyString())).thenReturn("test");
logger.info("Project Name: " + BigQueryUtils.getProjectName(request.getCatalogName()));
}
}
Aggregations