Search in sources :

Example 86 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TeradataMetadataHandler method getPartitionDetails.

/**
 * Internal function to fetch partition details
 * @param blockWriter
 * @param getPartitionsQuery
 * @param parameters
 * @param connection
 * @throws SQLException
 */
private void getPartitionDetails(BlockWriter blockWriter, String getPartitionsQuery, List<String> parameters, Connection connection) throws SQLException {
    try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(getPartitionsQuery).withParameters(parameters).build();
        ResultSet resultSet = preparedStatement.executeQuery()) {
        // Return a single partition if no partitions defined
        if (!resultSet.next()) {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                // we wrote 1 row so we return 1
                return 1;
            });
        } else {
            do {
                final String partitionName = resultSet.getString(BLOCK_PARTITION_COLUMN_NAME);
                // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
                    // we wrote 1 row so we return 1
                    return 1;
                });
            } while (resultSet.next());
        }
    } catch (RuntimeException runtimeException) {
        LOGGER.info("Exception occurred: {}", runtimeException.getMessage());
        if (runtimeException.getMessage().equalsIgnoreCase("Invalid Partition field.")) {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                // we wrote 1 row so we return 1
                return 1;
            });
        }
    }
}
Also used : ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 87 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TeradataMetadataHandler method getPartitions.

/*
     * We are first checking if input table is a view, if it's a view, it will not have any partition info and
     * data will be fetched with single split.If it is a table with no partition, then data will be fetched with single split.
     * If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
     * for parallel processing.
     * Teradata partitions
     * @param blockWriter
     * @param getTableLayoutRequest
     * @param queryStatusChecker
     * @throws Exception
     *
     */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    final String getPartitionsQuery = "Select DISTINCT partition FROM " + getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName() + " where 1= ?";
    boolean viewFlag = false;
    // Check if input table is a view
    List<String> viewparameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(VIEW_CHECK_QUERY).withParameters(viewparameters).build();
            ResultSet resultSet = preparedStatement.executeQuery()) {
            if (resultSet.next()) {
                viewFlag = true;
            }
            LOGGER.debug("viewFlag: {}", viewFlag);
        } catch (SQLException sqlException) {
            throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
        }
    }
    // if the input table is a view , there will be single split
    if (viewFlag) {
        blockWriter.writeRows((Block block, int rowNum) -> {
            block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
            return 1;
        });
    } else {
        /*
             It is not a view, check if query should be executed with single split by reading environment variable partitioncount
             partitioncount is a configurable Environment variable which has been defined. It limits maximum number of
             partitions, if it exceeds the value, then there will be only single split. This use case has been added to handle scenario
             where there are huge partitions and query times out. If appropriate predicate filter is applied , then data will be fetched
             without query getting timed out.
            */
        boolean nonPartitionApproach = useNonPartitionApproach(getTableLayoutRequest);
        if (nonPartitionApproach) {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                return 1;
            });
        } else {
            List<String> parameters = Arrays.asList(Integer.toString(1));
            try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
                getPartitionDetails(blockWriter, getPartitionsQuery, parameters, connection);
            } catch (SQLException sqlException) {
                throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
            }
        }
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 88 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    String expectedQuery = "SELECT measure_name, measure_value::double, az, time, hostname, region FROM \"my_schema\".\"my_table\" WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
    QueryResult mockResult = makeMockQueryResult(schemaForRead, 100_000);
    when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
        QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
        assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
        return mockResult;
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create());
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Schema(org.apache.arrow.vector.types.pojo.Schema) Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ByteArrayInputStream(java.io.ByteArrayInputStream) After(org.junit.After) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AmazonTimestreamQuery(com.amazonaws.services.timestreamquery.AmazonTimestreamQuery) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) UUID(java.util.UUID) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Matchers.any(org.mockito.Matchers.any) List(java.util.List) ByteStreams(com.google.common.io.ByteStreams) BlockUtils(com.amazonaws.athena.connector.lambda.data.BlockUtils) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) EncryptionKeyFactory(com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory) Mockito.mock(org.mockito.Mockito.mock) Mock(org.mockito.Mock) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3Object(com.amazonaws.services.s3.model.S3Object) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) Matchers.anyObject(org.mockito.Matchers.anyObject) AmazonS3(com.amazonaws.services.s3.AmazonS3) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) S3BlockSpillReader(com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader) Before(org.junit.Before) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Assert.assertNotNull(org.junit.Assert.assertNotNull) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Assert.assertTrue(org.junit.Assert.assertTrue) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) VIEW_METADATA_FIELD(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.VIEW_METADATA_FIELD) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 89 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandler method readWithConstraint.

/**
 * Generated TPCDS data for the given Table and scale factor as defined by the requested Split.
 *
 * @see RecordHandler
 */
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws IOException {
    Split split = recordsRequest.getSplit();
    int splitNumber = Integer.parseInt(split.getProperty(SPLIT_NUMBER_FIELD));
    int totalNumSplits = Integer.parseInt(split.getProperty(SPLIT_TOTAL_NUMBER_FIELD));
    int scaleFactor = Integer.parseInt(split.getProperty(SPLIT_SCALE_FACTOR_FIELD));
    Table table = validateTable(recordsRequest.getTableName());
    Session session = Session.getDefaultSession().withScale(scaleFactor).withParallelism(totalNumSplits).withChunkNumber(splitNumber + 1).withTable(table).withNoSexism(true);
    Results results = constructResults(table, session);
    Iterator<List<List<String>>> itr = results.iterator();
    Map<Integer, CellWriter> writers = makeWriters(recordsRequest.getSchema(), table);
    while (itr.hasNext() && queryStatusChecker.isQueryRunning()) {
        List<String> row = itr.next().get(0);
        spiller.writeRows((Block block, int numRow) -> {
            boolean matched = true;
            for (Map.Entry<Integer, CellWriter> nextWriter : writers.entrySet()) {
                matched &= nextWriter.getValue().write(block, numRow, row.get(nextWriter.getKey()));
            }
            return matched ? 1 : 0;
        });
    }
}
Also used : Table(com.teradata.tpcds.Table) Results.constructResults(com.teradata.tpcds.Results.constructResults) Results(com.teradata.tpcds.Results) Block(com.amazonaws.athena.connector.lambda.data.Block) List(java.util.List) Split(com.amazonaws.athena.connector.lambda.domain.Split) HashMap(java.util.HashMap) Map(java.util.Map) Session(com.teradata.tpcds.Session)

Example 90 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class VerticaMetadataHandler method getPartitions.

/**
 * Used to get the partitions that must be read from the request table in order to satisfy the requested predicate.
 * Here generating the SQL from the request and attaching it as a additional column
 *
 * @param blockWriter Used to write rows (partitions) into the Apache Arrow response.
 * @param request Provides details of the catalog, database, and table being queried as well as any filter predicate.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws SQLException {
    logger.info("in getPartitions: " + request);
    Schema schemaName = request.getSchema();
    TableName tableName = request.getTableName();
    Constraints constraints = request.getConstraints();
    // get the bucket where export results wll be uploaded
    String s3ExportBucket = getS3ExportBucket();
    // Appending a random int to the query id to support multiple federated queries within a single query
    String randomStr = UUID.randomUUID().toString();
    String queryID = request.getQueryId().replace("-", "").concat(randomStr);
    // Build the SQL query
    Connection connection = getConnection(request);
    DatabaseMetaData dbMetadata = connection.getMetaData();
    ResultSet definition = dbMetadata.getColumns(null, tableName.getSchemaName(), tableName.getTableName(), null);
    VerticaExportQueryBuilder queryBuilder = queryFactory.createVerticaExportQueryBuilder();
    String preparedSQLStmt = queryBuilder.withS3ExportBucket(s3ExportBucket).withQueryID(queryID).withColumns(definition, schemaName).fromTable(tableName.getSchemaName(), tableName.getTableName()).withConstraints(constraints, schemaName).build();
    logger.info("Vertica Export Statement: {}", preparedSQLStmt);
    // Build the Set AWS Region SQL
    String awsRegionSql = queryBuilder.buildSetAwsRegionSql(amazonS3.getRegion().toString());
    // write the prepared SQL statement to the partition column created in enhancePartitionSchema
    blockWriter.writeRows((Block block, int rowNum) -> {
        boolean matched;
        matched = block.setValue("preparedStmt", rowNum, preparedSQLStmt);
        matched &= block.setValue("queryId", rowNum, queryID);
        matched &= block.setValue("awsRegionSql", rowNum, awsRegionSql);
        // If all fields matches then we wrote 1 row during this call so we return 1
        return matched ? 1 : 0;
    });
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) VerticaExportQueryBuilder(com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder) Schema(org.apache.arrow.vector.types.pojo.Schema) Block(com.amazonaws.athena.connector.lambda.data.Block)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11