Search in sources :

Example 1 with VerticaExportQueryBuilder

use of com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder in project aws-athena-query-federation by awslabs.

the class VerticaMetadataHandlerTest method getPartitions.

@Test
public void getPartitions() throws Exception {
    Schema tableSchema = SchemaBuilder.newBuilder().addIntField("day").addIntField("month").addIntField("year").addStringField("preparedStmt").addStringField("queryId").addStringField("awsRegionSql").build();
    Set<String> partitionCols = new HashSet<>();
    partitionCols.add("preparedStmt");
    partitionCols.add("queryId");
    partitionCols.add("awsRegionSql");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("day", SortedRangeSet.copyOf(org.apache.arrow.vector.types.Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, org.apache.arrow.vector.types.Types.MinorType.INT.getType(), 0)), false));
    constraintsMap.put("month", SortedRangeSet.copyOf(org.apache.arrow.vector.types.Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, org.apache.arrow.vector.types.Types.MinorType.INT.getType(), 0)), false));
    constraintsMap.put("year", SortedRangeSet.copyOf(org.apache.arrow.vector.types.Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, org.apache.arrow.vector.types.Types.MinorType.INT.getType(), 2000)), false));
    GetTableLayoutRequest req = null;
    GetTableLayoutResponse res = null;
    String testSql = "Select * from schema1.table1";
    String[] test = new String[] { "Select * from schema1.table1", "Select * from schema1.table1" };
    String[] schema = { "TABLE_SCHEM", "TABLE_NAME", "COLUMN_NAME", "TYPE_NAME" };
    Object[][] values = { { "testSchema", "testTable1", "day", "int" }, { "testSchema", "testTable1", "month", "int" }, { "testSchema", "testTable1", "year", "int" }, { "testSchema", "testTable1", "preparedStmt", "varchar" }, { "testSchema", "testTable1", "queryId", "varchar" }, { "testSchema", "testTable1", "awsRegionSql", "varchar" } };
    int[] types = { Types.INTEGER, Types.INTEGER, Types.INTEGER, Types.VARCHAR, Types.VARCHAR, Types.VARCHAR };
    List<TableName> expectedTables = new ArrayList<>();
    expectedTables.add(new TableName("testSchema", "testTable1"));
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, types, values, rowNumber);
    Mockito.when(connection.getMetaData().getColumns(null, "schema1", "table1", null)).thenReturn(resultSet);
    Mockito.when(queryFactory.createVerticaExportQueryBuilder()).thenReturn(new VerticaExportQueryBuilder(new ST("templateVerticaExportQuery")));
    Mockito.when(verticaMetadataHandlerMocked.getS3ExportBucket()).thenReturn("testS3Bucket");
    try {
        req = new GetTableLayoutRequest(this.federatedIdentity, "queryId", "default", new TableName("schema1", "table1"), new Constraints(constraintsMap), tableSchema, partitionCols);
        res = verticaMetadataHandlerMocked.doGetTableLayout(allocator, req);
        Block partitions = res.getPartitions();
        String actualQueryID = partitions.getFieldReader("queryId").readText().toString();
        String expectedExportSql = "EXPORT TO PARQUET(directory = 's3://testS3Bucket/" + actualQueryID + "', Compression='snappy', fileSizeMB=16, rowGroupSizeMB=16) " + "AS SELECT day,month,year,preparedStmt,queryId,awsRegionSql " + "FROM \"schema1\".\"table1\" " + "WHERE ((\"day\" > 0 )) AND ((\"month\" > 0 )) AND ((\"year\" > 2000 ))";
        Assert.assertEquals(expectedExportSql, partitions.getFieldReader("preparedStmt").readText().toString());
        for (int row = 0; row < partitions.getRowCount() && row < 1; row++) {
            logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
        }
        assertTrue(partitions.getRowCount() > 0);
        logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
    } finally {
        try {
            req.close();
            res.close();
        } catch (Exception ex) {
            logger.error("doGetTableLayout: ", ex);
        }
    }
    logger.info("doGetTableLayout - exit");
}
Also used : ST(org.stringtemplate.v4.ST) Schema(org.apache.arrow.vector.types.pojo.Schema) Matchers.anyString(org.mockito.Matchers.anyString) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) VerticaExportQueryBuilder(com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 2 with VerticaExportQueryBuilder

use of com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder in project aws-athena-query-federation by awslabs.

the class VerticaMetadataHandler method getPartitions.

/**
 * Used to get the partitions that must be read from the request table in order to satisfy the requested predicate.
 * Here generating the SQL from the request and attaching it as a additional column
 *
 * @param blockWriter Used to write rows (partitions) into the Apache Arrow response.
 * @param request Provides details of the catalog, database, and table being queried as well as any filter predicate.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws SQLException {
    logger.info("in getPartitions: " + request);
    Schema schemaName = request.getSchema();
    TableName tableName = request.getTableName();
    Constraints constraints = request.getConstraints();
    // get the bucket where export results wll be uploaded
    String s3ExportBucket = getS3ExportBucket();
    // Appending a random int to the query id to support multiple federated queries within a single query
    String randomStr = UUID.randomUUID().toString();
    String queryID = request.getQueryId().replace("-", "").concat(randomStr);
    // Build the SQL query
    Connection connection = getConnection(request);
    DatabaseMetaData dbMetadata = connection.getMetaData();
    ResultSet definition = dbMetadata.getColumns(null, tableName.getSchemaName(), tableName.getTableName(), null);
    VerticaExportQueryBuilder queryBuilder = queryFactory.createVerticaExportQueryBuilder();
    String preparedSQLStmt = queryBuilder.withS3ExportBucket(s3ExportBucket).withQueryID(queryID).withColumns(definition, schemaName).fromTable(tableName.getSchemaName(), tableName.getTableName()).withConstraints(constraints, schemaName).build();
    logger.info("Vertica Export Statement: {}", preparedSQLStmt);
    // Build the Set AWS Region SQL
    String awsRegionSql = queryBuilder.buildSetAwsRegionSql(amazonS3.getRegion().toString());
    // write the prepared SQL statement to the partition column created in enhancePartitionSchema
    blockWriter.writeRows((Block block, int rowNum) -> {
        boolean matched;
        matched = block.setValue("preparedStmt", rowNum, preparedSQLStmt);
        matched &= block.setValue("queryId", rowNum, queryID);
        matched &= block.setValue("awsRegionSql", rowNum, awsRegionSql);
        // If all fields matches then we wrote 1 row during this call so we return 1
        return matched ? 1 : 0;
    });
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) VerticaExportQueryBuilder(com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder) Schema(org.apache.arrow.vector.types.pojo.Schema) Block(com.amazonaws.athena.connector.lambda.data.Block)

Aggregations

TableName (com.amazonaws.athena.connector.lambda.domain.TableName)2 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)2 VerticaExportQueryBuilder (com.amazonaws.athena.connectors.vertica.query.VerticaExportQueryBuilder)2 Schema (org.apache.arrow.vector.types.pojo.Schema)2 Block (com.amazonaws.athena.connector.lambda.data.Block)1 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Test (org.junit.Test)1 Matchers.anyString (org.mockito.Matchers.anyString)1 ST (org.stringtemplate.v4.ST)1