Search in sources :

Example 26 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandler method getPartitions.

/**
 * Check whether input table is a view or not. If it's a view, it will not have any partition info and
 * data will be fetched with single split.If it's a table with no partition, then data will be fetched with single split.
 * If it's a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
 * for parallel processing.
 * @param blockWriter
 * @param getTableLayoutRequest
 * @param queryStatusChecker
 * @throws Exception
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    List<String> params = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
    // check whether the input table is a view or not
    String viewFlag = "N";
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(VIEW_CHECK_QUERY).withParameters(params).build();
        ResultSet resultSet = preparedStatement.executeQuery()) {
        if (resultSet.next()) {
            viewFlag = "VIEW".equalsIgnoreCase(resultSet.getString("TYPE_DESC")) ? "Y" : "N";
        }
        LOGGER.info("viewFlag: {}", viewFlag);
    } catch (SQLException sqlException) {
        throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
    List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
            PreparedStatement preparedStatement2 = new PreparedStatementBuilder().withConnection(connection).withQuery(ROW_COUNT_QUERY).withParameters(parameters).build();
            ResultSet resultSet = preparedStatement.executeQuery();
            ResultSet resultSet2 = preparedStatement2.executeQuery()) {
            // check whether the table have partitions or not using ROW_COUNT_QUERY
            if (resultSet2.next()) {
                rowCount = resultSet2.getInt("ROW_COUNT");
                LOGGER.info("rowCount: {}", rowCount);
            }
            // create a single split for view/non-partition table
            if ("Y".equals(viewFlag) || rowCount == 0) {
                LOGGER.debug("Getting as single Partition: ");
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(PARTITION_NUMBER, rowNum, ALL_PARTITIONS);
                    // we wrote 1 row so we return 1
                    return 1;
                });
            } else {
                LOGGER.debug("Getting data with diff Partitions: ");
                // get partition details from sql server meta data tables
                getPartitionFunction(params);
                // Include the first partition because it's not retrieved from GET_PARTITIONS_QUERY
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(PARTITION_NUMBER, rowNum, "1");
                    return 1;
                });
                if (resultSet.next()) {
                    do {
                        final String partitionNumber = resultSet.getString(PARTITION_NUMBER);
                        // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                        // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                        blockWriter.writeRows((Block block, int rowNum) -> {
                            block.setValue(PARTITION_NUMBER, rowNum, partitionNumber);
                            // we wrote 1 row so we return 1
                            return 1;
                        });
                    } while (resultSet.next());
                }
            }
        }
    } catch (SQLException sqlException) {
        throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 27 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TimestreamMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() throws Exception {
    logger.info("doGetSplits - enter");
    List<String> partitionCols = new ArrayList<>();
    Block partitions = BlockUtils.newBlock(allocator, "partition_id", Types.MinorType.INT.getType(), 0);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "query-id", defaultSchema, new TableName("database1", "table1"), partitions, partitionCols, new Constraints(new HashMap<>()), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
    logger.info("doGetSplits: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
    assertTrue("Continuation criteria violated", response.getSplits().size() == 1);
    assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
    logger.info("doGetSplits - exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) ArrayList(java.util.ArrayList) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Test(org.junit.Test)

Example 28 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TimestreamMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    logger.info("doGetTableLayout - enter");
    Schema schema = SchemaBuilder.newBuilder().build();
    GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "query-id", defaultSchema, new TableName("database1", "table1"), new Constraints(new HashMap<>()), schema, Collections.EMPTY_SET);
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout - {}", res);
    Block partitions = res.getPartitions();
    for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
        logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
    }
    assertTrue(partitions.getRowCount() == 1);
    logger.info("doGetTableLayout - exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Block(com.amazonaws.athena.connector.lambda.data.Block) Test(org.junit.Test)

Example 29 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TPCDSMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    logger.info("doGetSplits: enter");
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = BlockUtils.newBlock(allocator, "partitionId", Types.MinorType.INT.getType(), 1);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName("tpcds1", "customer"), partitions, Collections.EMPTY_LIST, new Constraints(new HashMap<>()), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty(SPLIT_NUMBER_FIELD));
            assertNotNull(nextSplit.getProperty(SPLIT_TOTAL_NUMBER_FIELD));
            assertNotNull(nextSplit.getProperty(SPLIT_SCALE_FACTOR_FIELD));
        }
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertTrue(numContinuations == 0);
    logger.info("doGetSplits: exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Example 30 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("c_current_cdemo_sk", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "10000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11