Search in sources :

Example 41 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    PreparedStatement viewCheckPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.VIEW_CHECK_QUERY)).thenReturn(viewCheckPreparedStatement);
    ResultSet viewCheckqueryResultSet = mockResultSet(new String[] { "TYPE_DESC" }, new int[] { Types.VARCHAR }, new Object[][] { { "TABLE" } }, new AtomicInteger(-1));
    Mockito.when(viewCheckPreparedStatement.executeQuery()).thenReturn(viewCheckqueryResultSet);
    PreparedStatement rowCountPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.ROW_COUNT_QUERY)).thenReturn(rowCountPreparedStatement);
    ResultSet rowCountResultSet = mockResultSet(new String[] { "ROW_COUNT" }, new int[] { Types.INTEGER }, new Object[][] { { 2 } }, new AtomicInteger(-1));
    Mockito.when(rowCountPreparedStatement.executeQuery()).thenReturn(rowCountResultSet);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { sqlServerMetadataHandler.PARTITION_NUMBER };
    int[] types = { Types.INTEGER };
    Object[][] values = { { 2 }, { 3 } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    PreparedStatement partFuncPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITION_FUNCTION_QUERY)).thenReturn(partFuncPreparedStatement);
    ResultSet partFuncResultSet = mockResultSet(new String[] { "PARTITION FUNCTION", "PARTITIONING COLUMN" }, new int[] { Types.VARCHAR, Types.VARCHAR }, new Object[][] { { "pf", "pc" } }, new AtomicInteger(-1));
    Mockito.when(partFuncPreparedStatement.executeQuery()).thenReturn(partFuncResultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    Schema partitionSchema = this.sqlServerMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    GetTableLayoutResponse getTableLayoutResponse = this.sqlServerMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    BlockAllocator splitBlockAllocator = new BlockAllocatorImpl();
    GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, null);
    GetSplitsResponse getSplitsResponse = this.sqlServerMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest);
    Set<Map<String, String>> expectedSplits = new HashSet<>();
    expectedSplits.add(Map.ofEntries(Map.entry(sqlServerMetadataHandler.PARTITION_NUMBER, "1"), Map.entry("PARTITIONING_COLUMN", "pc"), Map.entry("PARTITION_FUNCTION", "pf")));
    expectedSplits.add(Map.ofEntries(Map.entry(sqlServerMetadataHandler.PARTITION_NUMBER, "2"), Map.entry("PARTITIONING_COLUMN", "pc"), Map.entry("PARTITION_FUNCTION", "pf")));
    expectedSplits.add(Map.ofEntries(Map.entry(sqlServerMetadataHandler.PARTITION_NUMBER, "3"), Map.entry("PARTITIONING_COLUMN", "pc"), Map.entry("PARTITION_FUNCTION", "pf")));
    Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size());
    Set<Map<String, String>> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet());
    Assert.assertEquals(expectedSplits, actualSplits);
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Schema(org.apache.arrow.vector.types.pojo.Schema) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) Map(java.util.Map) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 42 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class ExampleMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    if (!enableTests) {
        // We do this because until you complete the tutorial these tests will fail. When you attempt to publis
        // using ../toos/publish.sh ...  it will set the publishing flag and force these tests. This is how we
        // avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
        // on purpose since this is a somewhat odd pattern.
        logger.info("doGetSplits: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
        return;
    }
    logger.info("doGetSplits: enter");
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    // This is the schema that ExampleMetadataHandler has layed out for a 'Partition' so we need to populate this
    // minimal set of info here.
    Schema schema = SchemaBuilder.newBuilder().addIntField(yearCol).addIntField(monthCol).addIntField(dayCol).build();
    List<String> partitionCols = new ArrayList<>();
    partitionCols.add(yearCol);
    partitionCols.add(monthCol);
    partitionCols.add(dayCol);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 10;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    partitions.setRowCount(num_partitions);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(fakeIdentity(), "queryId", "catalog_name", new TableName("schema", "table_name"), partitions, partitionCols, new Constraints(constraintsMap), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetSplits: continuationToken[{}] - splits[{}]", continuationToken, response.getSplits());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty("year"));
            assertNotNull(nextSplit.getProperty("month"));
            assertNotNull(nextSplit.getProperty("day"));
        }
        assertTrue(!response.getSplits().isEmpty());
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertTrue(numContinuations == 0);
    logger.info("doGetSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 43 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class LambdaMetadataProvider method getSplits.

/**
 * This method builds and executes a GetSplitsRequest against the specified Lambda function.
 *
 * @param catalog the catalog name to be passed to Lambda
 * @param tableName the schema-qualified table name indicating the table for which splits should be retrieved
 * @param constraints the constraints to be applied to the request
 * @param partitions the block of partitions to be provided with the request
 * @param partitionCols the partition column names for the table in question
 * @param contToken a continuation token to be provided with the request, or null
 * @param metadataFunction the name of the Lambda function to call
 * @param identity the identity of the caller
 * @return the response
 */
public static GetSplitsResponse getSplits(String catalog, TableName tableName, Constraints constraints, Block partitions, List<String> partitionCols, String contToken, String metadataFunction, FederatedIdentity identity) {
    String queryId = generateQueryId();
    log.info("Submitting GetSplitsRequest with ID " + queryId);
    try (GetSplitsRequest request = new GetSplitsRequest(identity, queryId, catalog, tableName, partitions, partitionCols, constraints, contToken)) {
        log.info("Submitting request: {}", request);
        GetSplitsResponse response = (GetSplitsResponse) getService(metadataFunction, identity, catalog).call(request);
        log.info("Received response: {}", response);
        return response;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)

Example 44 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class ImpalaMuxMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    GetSplitsRequest getSplitsRequest = Mockito.mock(GetSplitsRequest.class);
    Mockito.when(getSplitsRequest.getCatalogName()).thenReturn("metaImpala");
    this.jdbcMetadataHandler.doGetSplits(this.allocator, getSplitsRequest);
    Mockito.verify(this.impalaMetadataHandler, Mockito.times(1)).doGetSplits(Mockito.eq(this.allocator), Mockito.eq(getSplitsRequest));
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Test(org.junit.Test)

Example 45 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricSamplesSplits.

@Test
public void doGetMetricSamplesSplits() throws Exception {
    logger.info("doGetMetricSamplesSplits: enter");
    String namespaceFilter = "MyNameSpace";
    String statistic = "p90";
    int numMetrics = 10;
    when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
        // assert that the namespace filter was indeed pushed down
        assertEquals(namespaceFilter, request.getNamespace());
        String nextToken = (request.getNextToken() == null) ? "valid" : null;
        List<Metric> metrics = new ArrayList<>();
        for (int i = 0; i < numMetrics; i++) {
            metrics.add(new Metric().withNamespace(namespaceFilter).withMetricName("metric-" + i));
        }
        return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
    });
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(namespaceFilter).build());
    constraintsMap.put(STATISTIC_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(statistic).build());
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetMetricSamplesSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetMetricSamplesSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        assertEquals(3, response.getSplits().size());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty(SERIALIZED_METRIC_STATS_FIELD_NAME));
        }
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertEquals(1, numContinuations);
    logger.info("doGetMetricSamplesSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ListMetricsResult(com.amazonaws.services.cloudwatch.model.ListMetricsResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) ListMetricsRequest(com.amazonaws.services.cloudwatch.model.ListMetricsRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)46 Test (org.junit.Test)41 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)32 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)29 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)24 Schema (org.apache.arrow.vector.types.pojo.Schema)24 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)17 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)16 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)16 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)16 HashMap (java.util.HashMap)16 HashSet (java.util.HashSet)15 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)14 Map (java.util.Map)14 Block (com.amazonaws.athena.connector.lambda.data.Block)13 ResultSet (java.sql.ResultSet)12 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)12 PreparedStatement (java.sql.PreparedStatement)9 ArrayList (java.util.ArrayList)9 Split (com.amazonaws.athena.connector.lambda.domain.Split)8