use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.
the class GlueMetadataHandlerTest method setUp.
@Before
public void setUp() throws Exception {
logger.info("{}: enter", testName.getMethodName());
handler = new GlueMetadataHandler(mockGlue, new LocalKeyFactory(), mock(AWSSecretsManager.class), mock(AmazonAthena.class), "glue-test", "spill-bucket", "spill-prefix") {
@Override
public GetTableLayoutResponse doGetTableLayout(BlockAllocator blockAllocator, GetTableLayoutRequest request) {
throw new UnsupportedOperationException();
}
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
throw new UnsupportedOperationException();
}
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest request) {
throw new UnsupportedOperationException();
}
};
allocator = new BlockAllocatorImpl();
// doListTables pagination.
when(mockGlue.getTables(any(GetTablesRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
GetTablesRequest request = (GetTablesRequest) invocationOnMock.getArguments()[0];
String nextToken = request.getNextToken();
int pageSize = request.getMaxResults() == null ? UNLIMITED_PAGE_SIZE_VALUE : request.getMaxResults();
assertEquals(accountId, request.getCatalogId());
assertEquals(schema, request.getDatabaseName());
GetTablesResult mockResult = mock(GetTablesResult.class);
if (pageSize == UNLIMITED_PAGE_SIZE_VALUE) {
// Simulate full list of tables returned from Glue.
when(mockResult.getTableList()).thenReturn(unPaginatedTables);
when(mockResult.getNextToken()).thenReturn(null);
} else {
// Simulate paginated list of tables returned from Glue.
List<Table> paginatedTables = unPaginatedTables.stream().sorted(Comparator.comparing(Table::getName)).filter(table -> nextToken == null || table.getName().compareTo(nextToken) >= 0).limit(pageSize + 1).collect(Collectors.toList());
if (paginatedTables.size() > pageSize) {
when(mockResult.getNextToken()).thenReturn(paginatedTables.get(pageSize).getName());
when(mockResult.getTableList()).thenReturn(paginatedTables.subList(0, pageSize));
} else {
when(mockResult.getNextToken()).thenReturn(null);
when(mockResult.getTableList()).thenReturn(paginatedTables);
}
}
return mockResult;
});
}
use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.
the class BigQueryMetadataHandlerTest method testDoGetSplits.
@Test
public void testDoGetSplits() throws Exception {
BlockAllocator blockAllocator = new BlockAllocatorImpl();
PowerMockito.mockStatic(BigQueryUtils.class);
when(BigQueryUtils.getBigQueryClient()).thenReturn(bigQuery);
when(BigQueryUtils.getEnvVar("concurrencyLimit")).thenReturn("10");
GetSplitsRequest request = new GetSplitsRequest(federatedIdentity, QUERY_ID, CATALOG, TABLE_NAME, mock(Block.class), Collections.<String>emptyList(), new Constraints(new HashMap<>()), null);
// added schema with integer column countCol
List<Field> testSchemaFields = Arrays.asList(Field.of("countCol", LegacySQLTypeName.INTEGER));
com.google.cloud.bigquery.Schema tableSchema = Schema.of(testSchemaFields);
// mocked table row count as 15
List<FieldValue> bigQueryRowValue = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "15"));
FieldValueList fieldValueList = FieldValueList.of(bigQueryRowValue, FieldList.of(testSchemaFields));
List<FieldValueList> tableRows = Arrays.asList(fieldValueList);
when(job.isDone()).thenReturn(false).thenReturn(true);
Page<FieldValueList> pageNoSchema = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), pageNoSchema);
when(job.getQueryResults()).thenReturn(result);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
GetSplitsResponse response = bigQueryMetadataHandler.doGetSplits(blockAllocator, request);
assertNotNull(response);
}
use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.
the class BigQueryRecordHandlerTest method getObjectFromFieldValue.
@Test
public void getObjectFromFieldValue() throws Exception {
org.apache.arrow.vector.types.pojo.Schema testSchema = SchemaBuilder.newBuilder().addDateDayField("datecol").addDateMilliField("datetimecol").addStringField("timestampcol").build();
try (ReadRecordsRequest request = new ReadRecordsRequest(federatedIdentity, BigQueryTestUtils.PROJECT_1_NAME, "queryId", new TableName("dataset1", "table1"), testSchema, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(bucket).withPrefix(prefix).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).build(), new Constraints(Collections.EMPTY_MAP), // This is ignored when directly calling readWithConstraints.
0, 0)) {
// This is ignored when directly calling readWithConstraints.
// Always return try for the evaluator to keep all rows.
ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
return true;
});
// added schema with columns datecol, datetimecol, timestampcol
List<com.google.cloud.bigquery.Field> testSchemaFields = Arrays.asList(com.google.cloud.bigquery.Field.of("datecol", LegacySQLTypeName.DATE), com.google.cloud.bigquery.Field.of("datetimecol", LegacySQLTypeName.DATETIME), com.google.cloud.bigquery.Field.of("timestampcol", LegacySQLTypeName.TIMESTAMP));
com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(testSchemaFields);
// mocked table rows
List<FieldValue> firstRowValues = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2016-02-05"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2021-10-30T10:10:10"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2014-12-03T12:30:00.450Z"));
FieldValueList firstRow = FieldValueList.of(firstRowValues, FieldList.of(testSchemaFields));
List<FieldValueList> tableRows = Arrays.asList(firstRow);
Page<FieldValueList> fieldValueList = new BigQueryPage<>(tableRows);
TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
// Mock out the Google BigQuery Job.
Job mockBigQueryJob = mock(Job.class);
when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
when(mockBigQueryJob.getQueryResults()).thenReturn(result);
when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
when(queryStatusChecker.isQueryRunning()).thenReturn(true);
// Execute the test
bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
PowerMockito.mockStatic(System.class);
PowerMockito.when(System.getenv(anyString())).thenReturn("test");
logger.info("Project Name: " + BigQueryUtils.getProjectName(request.getCatalogName()));
}
}
use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandlerTest method readWithConstraint.
@Test
public void readWithConstraint() throws SQLException {
ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
TableName inputTableName = new TableName("testSchema", "testTable");
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema fieldSchema = expectedSchemaBuilder.build();
BlockAllocator allocator = new BlockAllocatorImpl();
S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
String[] schema = { "testCol1", "testCol2" };
int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
AtomicInteger rowNumber = new AtomicInteger(-1);
ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
int n = byteArrayInputStream.available();
byte[] bytes = new byte[n];
byteArrayInputStream.read(bytes, 0, n);
String data = new String(bytes, StandardCharsets.UTF_8);
Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
return new PutObjectResult();
});
this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project foundry-athena-query-federation-connector by palantir.
the class PartitionFetcher method getAndWritePartitions.
private void getAndWritePartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) {
CatalogLocator locator = FoundryAthenaObjectMapper.objectMapper().convertValue(request.getSchema().getCustomMetadata(), CatalogLocator.class);
Optional<String> pageToken = Optional.empty();
while (queryStatusChecker.isQueryRunning()) {
GetPartitionsResponsePage page = metadataService.getPartitions(authProvider.getAuthHeader(), GetPartitionsRequest.builder().locator(locator).limit(PAGE_SIZE).pageToken(pageToken).build());
page.getPartitions().forEach(partition -> blockWriter.writeRows((block, rowNum) -> {
boolean matched = partition.get().entrySet().stream().map(fieldName -> fieldName.getValue().accept(new PartitionValueWriter(block, fieldName.getKey(), rowNum))).reduce(true, Boolean::logicalAnd);
// if all fields passed then we wrote 1 row
return matched ? 1 : 0;
}));
if (page.getNextPageToken().isPresent()) {
pageToken = page.getNextPageToken();
} else {
return;
}
}
}
Aggregations