Search in sources :

Example 76 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class PostGreSqlMetadataHandler method getPartitions.

@Override
public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) {
    LOGGER.info("{}: Catalog {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
        try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
            ResultSet resultSet = preparedStatement.executeQuery()) {
            // Return a single partition if no partitions defined
            if (!resultSet.next()) {
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                    block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                    // we wrote 1 row so we return 1
                    return 1;
                });
            } else {
                do {
                    final String partitionSchemaName = resultSet.getString(PARTITION_SCHEMA_NAME);
                    final String partitionName = resultSet.getString(PARTITION_NAME);
                    // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                    // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                    blockWriter.writeRows((Block block, int rowNum) -> {
                        block.setValue(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, rowNum, partitionSchemaName);
                        block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
                        // we wrote 1 row so we return 1
                        return 1;
                    });
                } while (resultSet.next());
            }
        }
    } catch (SQLException sqlException) {
        throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 77 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandler method getPartitions.

/**
 * We are first checking if input table is a view, if it's a view, it will not have any partition info and
 * data will be fetched with single split.If it is a table with no partition, then data will be fetched with single split.
 * If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
 * for parallel processing.
 * @param blockWriter
 * @param getTableLayoutRequest
 * @param queryStatusChecker
 * @throws Exception
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
    LOGGER.debug("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    // check if the input table is a view
    boolean viewFlag = false;
    List<String> viewparameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(SaphanaConstants.VIEW_CHECK_QUERY).withParameters(viewparameters).build();
            ResultSet resultSet = preparedStatement.executeQuery()) {
            if (resultSet.next()) {
                viewFlag = true;
            }
            LOGGER.debug("viewFlag: {}", viewFlag);
        } catch (SQLException sqlException) {
            LOGGER.debug("Exception while querying view details for view {}", getTableLayoutRequest.getTableName().getTableName());
            throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
        }
    }
    // For view create a single split
    if (viewFlag) {
        blockWriter.writeRows((Block block, int rowNum) -> {
            block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, SaphanaConstants.ALL_PARTITIONS);
            return 1;
        });
    } else {
        List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
        try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
            try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(SaphanaConstants.GET_PARTITIONS_QUERY).withParameters(parameters).build();
                ResultSet resultSet = preparedStatement.executeQuery()) {
                // Return a single partition if no partitions defined
                if (!resultSet.next()) {
                    blockWriter.writeRows((Block block, int rowNum) -> {
                        block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, SaphanaConstants.ALL_PARTITIONS);
                        // we wrote 1 row so we return 1
                        return 1;
                    });
                } else {
                    do {
                        final String partitionName = resultSet.getString(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME);
                        // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                        // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                        blockWriter.writeRows((Block block, int rowNum) -> {
                            block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
                            // we wrote 1 row so we return 1
                            return 1;
                        });
                    } while (resultSet.next());
                }
            }
        } catch (SQLException sqlException) {
            throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
        }
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 78 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandler method doGetSplits.

/**
 * @param blockAllocator
 * @param getSplitsRequest
 * @return
 */
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    // TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
        splits.add(splitBuilder.build());
        if (splits.size() >= SaphanaConstants.MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 79 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SnowflakeMetadataHandler method getPartitions.

/**
 * Snowflake manual partition logic based upon number of records
 * @param blockWriter
 * @param getTableLayoutRequest
 * @param queryStatusChecker
 * @throws Exception
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    Map<String, String> properties = System.getenv();
    /**
     * Customized environment variable "pagecount" for pagination based partition. It is currently set to 500000.
     * It means there will be 500000 rows per partition. The number of partition will be total number of rows divided by
     * pagecount variable value.
     */
    String pagecount = properties.get("pagecount");
    Long totalpagecount = Long.valueOf(pagecount);
    /**
     * Customized environment variable "partitionlimit" to limit the number of partitions.
     * this is to handle timeout issues because of huge partitions
     */
    String partitionlimit = properties.get("partitionlimit");
    Long totalPartitionlimit = Long.valueOf(partitionlimit);
    LOGGER.info(" Total Partition Limit" + totalPartitionlimit);
    LOGGER.info(" Total Page  Count" + totalpagecount);
    long offset = 0;
    double limit = 0;
    double totalRecordCount = 0;
    boolean viewFlag = checkForView(getTableLayoutRequest);
    // if the input table is a view , there will be single split
    if (viewFlag) {
        blockWriter.writeRows((Block block, int rowNum) -> {
            block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
            return 1;
        });
    } else {
        LOGGER.info(COUNT_RECORDS_QUERY);
        List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName());
        try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
            PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(COUNT_RECORDS_QUERY).withParameters(parameters).build();
            ResultSet rs = preparedStatement.executeQuery()) {
            while (rs.next()) {
                totalRecordCount = rs.getInt(1);
            }
            double limitValue = totalRecordCount / totalpagecount;
            limit = (int) Math.ceil(limitValue);
            if (totalRecordCount > 0) {
                // it will be treated as a single partition.
                if (limit > totalPartitionlimit) {
                    final String partitionVal = BLOCK_PARTITION_COLUMN_NAME + "-limit-" + totalRecordCount + "-offset-" + offset;
                    LOGGER.info("partitionVal {} ", partitionVal);
                    blockWriter.writeRows((Block block, int rowNum) -> {
                        block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionVal);
                        return 1;
                    });
                } else {
                    /**
                     * Custom pagination based partition logic will be applied with limit and offset clauses.
                     * the partition values we are setting the limit and offste values like p-limit-3000-offset-0
                     */
                    for (int i = 1; i <= limit; i++) {
                        if (i > 1) {
                            offset = offset + totalpagecount;
                        }
                        final String partitionVal = BLOCK_PARTITION_COLUMN_NAME + "-limit-" + pagecount + "-offset-" + offset;
                        LOGGER.info("partitionVal {} ", partitionVal);
                        blockWriter.writeRows((Block block, int rowNum) -> {
                            block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionVal);
                            return 1;
                        });
                    }
                }
            } else {
                LOGGER.info("No Records Found for table {}", getTableLayoutRequest.getTableName().getTableName());
            }
        } catch (SQLException sqlException) {
            throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
        } catch (Exception exception) {
            LOGGER.error("Error occurred while getting the results", exception);
        }
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) PreparedStatement(java.sql.PreparedStatement) SQLException(java.sql.SQLException) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 80 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SnowflakeMetadataHandler method doGetSplits.

@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    // TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11