Search in sources :

Example 1 with QueryStatusChecker

use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.

the class GlueMetadataHandlerTest method setUp.

@Before
public void setUp() throws Exception {
    logger.info("{}: enter", testName.getMethodName());
    handler = new GlueMetadataHandler(mockGlue, new LocalKeyFactory(), mock(AWSSecretsManager.class), mock(AmazonAthena.class), "glue-test", "spill-bucket", "spill-prefix") {

        @Override
        public GetTableLayoutResponse doGetTableLayout(BlockAllocator blockAllocator, GetTableLayoutRequest request) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
            throw new UnsupportedOperationException();
        }

        @Override
        public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest request) {
            throw new UnsupportedOperationException();
        }
    };
    allocator = new BlockAllocatorImpl();
    // doListTables pagination.
    when(mockGlue.getTables(any(GetTablesRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        GetTablesRequest request = (GetTablesRequest) invocationOnMock.getArguments()[0];
        String nextToken = request.getNextToken();
        int pageSize = request.getMaxResults() == null ? UNLIMITED_PAGE_SIZE_VALUE : request.getMaxResults();
        assertEquals(accountId, request.getCatalogId());
        assertEquals(schema, request.getDatabaseName());
        GetTablesResult mockResult = mock(GetTablesResult.class);
        if (pageSize == UNLIMITED_PAGE_SIZE_VALUE) {
            // Simulate full list of tables returned from Glue.
            when(mockResult.getTableList()).thenReturn(unPaginatedTables);
            when(mockResult.getNextToken()).thenReturn(null);
        } else {
            // Simulate paginated list of tables returned from Glue.
            List<Table> paginatedTables = unPaginatedTables.stream().sorted(Comparator.comparing(Table::getName)).filter(table -> nextToken == null || table.getName().compareTo(nextToken) >= 0).limit(pageSize + 1).collect(Collectors.toList());
            if (paginatedTables.size() > pageSize) {
                when(mockResult.getNextToken()).thenReturn(paginatedTables.get(pageSize).getName());
                when(mockResult.getTableList()).thenReturn(paginatedTables.subList(0, pageSize));
            } else {
                when(mockResult.getNextToken()).thenReturn(null);
                when(mockResult.getTableList()).thenReturn(paginatedTables);
            }
        }
        return mockResult;
    });
}
Also used : Table(com.amazonaws.services.glue.model.Table) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) BlockWriter(com.amazonaws.athena.connector.lambda.data.BlockWriter) GetTablesRequest(com.amazonaws.services.glue.model.GetTablesRequest) Before(org.junit.Before)

Example 2 with QueryStatusChecker

use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.

the class BigQueryMetadataHandlerTest method testDoGetSplits.

@Test
public void testDoGetSplits() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    PowerMockito.mockStatic(BigQueryUtils.class);
    when(BigQueryUtils.getBigQueryClient()).thenReturn(bigQuery);
    when(BigQueryUtils.getEnvVar("concurrencyLimit")).thenReturn("10");
    GetSplitsRequest request = new GetSplitsRequest(federatedIdentity, QUERY_ID, CATALOG, TABLE_NAME, mock(Block.class), Collections.<String>emptyList(), new Constraints(new HashMap<>()), null);
    // added schema with integer column countCol
    List<Field> testSchemaFields = Arrays.asList(Field.of("countCol", LegacySQLTypeName.INTEGER));
    com.google.cloud.bigquery.Schema tableSchema = Schema.of(testSchemaFields);
    // mocked table row count as 15
    List<FieldValue> bigQueryRowValue = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "15"));
    FieldValueList fieldValueList = FieldValueList.of(bigQueryRowValue, FieldList.of(testSchemaFields));
    List<FieldValueList> tableRows = Arrays.asList(fieldValueList);
    when(job.isDone()).thenReturn(false).thenReturn(true);
    Page<FieldValueList> pageNoSchema = new BigQueryPage<>(tableRows);
    TableResult result = new TableResult(tableSchema, tableRows.size(), pageNoSchema);
    when(job.getQueryResults()).thenReturn(result);
    QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
    when(queryStatusChecker.isQueryRunning()).thenReturn(true);
    GetSplitsResponse response = bigQueryMetadataHandler.doGetSplits(blockAllocator, request);
    assertNotNull(response);
}
Also used : HashMap(java.util.HashMap) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) Block(com.amazonaws.athena.connector.lambda.data.Block) com.google.cloud.bigquery(com.google.cloud.bigquery) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 3 with QueryStatusChecker

use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.

the class BigQueryRecordHandlerTest method getObjectFromFieldValue.

@Test
public void getObjectFromFieldValue() throws Exception {
    org.apache.arrow.vector.types.pojo.Schema testSchema = SchemaBuilder.newBuilder().addDateDayField("datecol").addDateMilliField("datetimecol").addStringField("timestampcol").build();
    try (ReadRecordsRequest request = new ReadRecordsRequest(federatedIdentity, BigQueryTestUtils.PROJECT_1_NAME, "queryId", new TableName("dataset1", "table1"), testSchema, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(bucket).withPrefix(prefix).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).build(), new Constraints(Collections.EMPTY_MAP), // This is ignored when directly calling readWithConstraints.
    0, 0)) {
        // This is ignored when directly calling readWithConstraints.
        // Always return try for the evaluator to keep all rows.
        ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
        when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
            return true;
        });
        // added schema with columns datecol, datetimecol, timestampcol
        List<com.google.cloud.bigquery.Field> testSchemaFields = Arrays.asList(com.google.cloud.bigquery.Field.of("datecol", LegacySQLTypeName.DATE), com.google.cloud.bigquery.Field.of("datetimecol", LegacySQLTypeName.DATETIME), com.google.cloud.bigquery.Field.of("timestampcol", LegacySQLTypeName.TIMESTAMP));
        com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(testSchemaFields);
        // mocked table rows
        List<FieldValue> firstRowValues = Arrays.asList(FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2016-02-05"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2021-10-30T10:10:10"), FieldValue.of(FieldValue.Attribute.PRIMITIVE, "2014-12-03T12:30:00.450Z"));
        FieldValueList firstRow = FieldValueList.of(firstRowValues, FieldList.of(testSchemaFields));
        List<FieldValueList> tableRows = Arrays.asList(firstRow);
        Page<FieldValueList> fieldValueList = new BigQueryPage<>(tableRows);
        TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
        // Mock out the Google BigQuery Job.
        Job mockBigQueryJob = mock(Job.class);
        when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
        when(mockBigQueryJob.getQueryResults()).thenReturn(result);
        when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
        QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
        when(queryStatusChecker.isQueryRunning()).thenReturn(true);
        // Execute the test
        bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
        PowerMockito.mockStatic(System.class);
        PowerMockito.when(System.getenv(anyString())).thenReturn("test");
        logger.info("Project Name: " + BigQueryUtils.getProjectName(request.getCatalogName()));
    }
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) Field(org.apache.arrow.vector.types.pojo.Field) com.google.cloud.bigquery(com.google.cloud.bigquery) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3Object(com.amazonaws.services.s3.model.S3Object) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 4 with QueryStatusChecker

use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project aws-athena-query-federation by awslabs.

the class JdbcRecordHandlerTest method readWithConstraint.

@Test
public void readWithConstraint() throws SQLException {
    ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
    Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
    TableName inputTableName = new TableName("testSchema", "testTable");
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema fieldSchema = expectedSchemaBuilder.build();
    BlockAllocator allocator = new BlockAllocatorImpl();
    S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
    Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
    String[] schema = { "testCol1", "testCol2" };
    int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
    Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
    Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
    SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
    Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
    BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
    ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
    Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
        ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
        int n = byteArrayInputStream.available();
        byte[] bytes = new byte[n];
        byteArrayInputStream.read(bytes, 0, n);
        String data = new String(bytes, StandardCharsets.UTF_8);
        Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
        return new PutObjectResult();
    });
    this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) Connection(java.sql.Connection) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) com.amazonaws.athena.connector.lambda.data.writers.extractors(com.amazonaws.athena.connector.lambda.data.writers.extractors) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) Answer(org.mockito.stubbing.Answer) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) SQLException(java.sql.SQLException) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) ByteArrayInputStream(java.io.ByteArrayInputStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestBase(com.amazonaws.athena.connectors.jdbc.TestBase) AmazonS3(com.amazonaws.services.s3.AmazonS3) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetSecretValueResult(com.amazonaws.services.secretsmanager.model.GetSecretValueResult) Before(org.junit.Before) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) DatabaseConnectionConfig(com.amazonaws.athena.connectors.jdbc.connection.DatabaseConnectionConfig) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) StandardCharsets(java.nio.charset.StandardCharsets) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Mockito(org.mockito.Mockito) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) Assert(org.junit.Assert) JdbcConnectionFactory(com.amazonaws.athena.connectors.jdbc.connection.JdbcConnectionFactory) Collections(java.util.Collections) GetSecretValueRequest(com.amazonaws.services.secretsmanager.model.GetSecretValueRequest) Types(java.sql.Types) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) Schema(org.apache.arrow.vector.types.pojo.Schema) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) Split(com.amazonaws.athena.connector.lambda.domain.Split) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) Test(org.junit.Test)

Example 5 with QueryStatusChecker

use of com.amazonaws.athena.connector.lambda.QueryStatusChecker in project foundry-athena-query-federation-connector by palantir.

the class PartitionFetcher method getAndWritePartitions.

private void getAndWritePartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) {
    CatalogLocator locator = FoundryAthenaObjectMapper.objectMapper().convertValue(request.getSchema().getCustomMetadata(), CatalogLocator.class);
    Optional<String> pageToken = Optional.empty();
    while (queryStatusChecker.isQueryRunning()) {
        GetPartitionsResponsePage page = metadataService.getPartitions(authProvider.getAuthHeader(), GetPartitionsRequest.builder().locator(locator).limit(PAGE_SIZE).pageToken(pageToken).build());
        page.getPartitions().forEach(partition -> blockWriter.writeRows((block, rowNum) -> {
            boolean matched = partition.get().entrySet().stream().map(fieldName -> fieldName.getValue().accept(new PartitionValueWriter(block, fieldName.getKey(), rowNum))).reduce(true, Boolean::logicalAnd);
            // if all fields passed then we wrote 1 row
            return matched ? 1 : 0;
        }));
        if (page.getNextPageToken().isPresent()) {
            pageToken = page.getNextPageToken();
        } else {
            return;
        }
    }
}
Also used : CatalogLocator(com.palantir.foundry.athena.api.CatalogLocator) GetPartitionsResponsePage(com.palantir.foundry.athena.api.GetPartitionsResponsePage) BlockWriter(com.amazonaws.athena.connector.lambda.data.BlockWriter) CatalogLocator(com.palantir.foundry.athena.api.CatalogLocator) FoundryAthenaMetadataServiceBlocking(com.palantir.foundry.athena.api.FoundryAthenaMetadataServiceBlocking) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Optional(java.util.Optional) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) GetPartitionsRequest(com.palantir.foundry.athena.api.GetPartitionsRequest) GetPartitionsResponsePage(com.palantir.foundry.athena.api.GetPartitionsResponsePage)

Aggregations

QueryStatusChecker (com.amazonaws.athena.connector.lambda.QueryStatusChecker)10 ConstraintEvaluator (com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator)5 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)4 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)4 Schema (org.apache.arrow.vector.types.pojo.Schema)4 Test (org.junit.Test)4 Block (com.amazonaws.athena.connector.lambda.data.Block)3 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)3 BlockWriter (com.amazonaws.athena.connector.lambda.data.BlockWriter)3 SpillConfig (com.amazonaws.athena.connector.lambda.data.SpillConfig)3 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)3 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)3 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)3 com.google.cloud.bigquery (com.google.cloud.bigquery)3 Before (org.junit.Before)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)3 S3BlockSpiller (com.amazonaws.athena.connector.lambda.data.S3BlockSpiller)2 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)2 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)2