Search in sources :

Example 81 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisRecordHandler method loadLiteralRow.

private void loadLiteralRow(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
    spiller.writeRows((Block block, int row) -> {
        if (fieldList.size() != 1) {
            throw new RuntimeException("Ambiguous field mapping, more than 1 field for literal value type.");
        }
        Field field = fieldList.get(0);
        Object value = ValueConverter.convert(field, syncCommands.get(keyString));
        boolean literalMatched = block.offerValue(KEY_COLUMN_NAME, row, keyString);
        literalMatched &= block.offerValue(field.getName(), row, value);
        return literalMatched ? 1 : 0;
    });
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Block(com.amazonaws.athena.connector.lambda.data.Block)

Example 82 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandler method doGetSplits.

/**
 * @param blockAllocator
 * @param getSplitsRequest
 * @return
 */
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    LOGGER.info("partitionContd: {}", partitionContd);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(PARTITION_NUMBER);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder;
        // Included partition information to split if the table is partitioned
        if (partitionFunction != null && partitioningColumn != null) {
            splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, String.valueOf(locationReader.readText())).add(PARTITION_FUNCTION, partitionFunction).add(PARTITIONING_COLUMN, partitioningColumn);
        } else {
            splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, String.valueOf(locationReader.readText()));
        }
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 83 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SynapseMetadataHandler method doGetSplits.

/**
 * @param blockAllocator
 * @param getSplitsRequest
 * @return
 */
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(PARTITION_NUMBER);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder;
        String partInfo = String.valueOf(locationReader.readText());
        // Included partition information to split if the table is partitioned
        if (partInfo.contains(":::")) {
            String[] partInfoAr = partInfo.split(":::");
            splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, partInfoAr[0]).add(PARTITION_BOUNDARY_FROM, partInfoAr[1]).add(PARTITION_BOUNDARY_TO, partInfoAr[2]).add(PARTITION_COLUMN, partInfoAr[3]);
        } else {
            splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, partInfo);
        }
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 84 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class SynapseMetadataHandler method getPartitions.

/**
 * Partition metadata queries will be extracted from template files, we can check whether the table is partitioned or not using these queries.
 * If it is a table with no partition, then data will be fetched with single split.
 * If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
 * for parallel processing.
 * @param blockWriter
 * @param getTableLayoutRequest
 * @param queryStatusChecker
 * @throws Exception
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    /**
     * Queries formed through String Template for retrieving Azure Synapse table partitions
     */
    STGroup stGroup = new STGroupDir("templates", '$', '$');
    ST getPartitionsSt = stGroup.getInstanceOf("getPartitions");
    getPartitionsSt.add("name", getTableLayoutRequest.getTableName().getTableName());
    getPartitionsSt.add("schemaname", getTableLayoutRequest.getTableName().getSchemaName());
    ST rowCountSt = stGroup.getInstanceOf("rowCount");
    rowCountSt.add("name", getTableLayoutRequest.getTableName().getTableName());
    rowCountSt.add("schemaname", getTableLayoutRequest.getTableName().getSchemaName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        try (Statement st = connection.createStatement();
            Statement st2 = connection.createStatement();
            ResultSet resultSet = st.executeQuery(getPartitionsSt.render());
            ResultSet resultSet2 = st2.executeQuery(rowCountSt.render())) {
            // check whether the table have partitions or not using ROW_COUNT_QUERY
            if (resultSet2.next()) {
                rowCount = resultSet2.getInt("ROW_COUNT");
                LOGGER.info("rowCount: {}", rowCount);
            }
            // create a single split for view/non-partition table
            if (rowCount == 0) {
                LOGGER.debug("Getting as single Partition: ");
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(PARTITION_NUMBER, rowNum, ALL_PARTITIONS);
                    // we wrote 1 row so we return 1
                    return 1;
                });
            } else {
                LOGGER.debug("Getting data with diff Partitions: ");
                /*
                    Synapse supports Range Partitioning. Partition column, partition range values are extracted from Synapse metadata tables.
                    partition boundaries will be formed using those values.
                    Ex: if partition column is 'col1', partition range values are 10, 200, null then
                        below partition boundaries will be created to form custom queries for splits
                        1::: :::10:::col1, 2:::10:::200:::col1, 3:::200::: :::col1
                     */
                while (resultSet.next()) {
                    final String partitionNumber = resultSet.getString(PARTITION_NUMBER);
                    LOGGER.debug("partitionNumber: {}", partitionNumber);
                    if ("1".equals(partitionNumber)) {
                        partitionBoundaryFrom = " ";
                        partitionColumn = resultSet.getString(PARTITION_COLUMN);
                        LOGGER.debug("partitionColumn: {}", partitionColumn);
                    } else {
                        partitionBoundaryFrom = partitionBoundaryTo;
                    }
                    partitionBoundaryTo = resultSet.getString("PARTITION_BOUNDARY_VALUE");
                    partitionBoundaryTo = (partitionBoundaryTo == null) ? " " : partitionBoundaryTo;
                    // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                    // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                    blockWriter.writeRows((Block block, int rowNum) -> {
                        // creating the partition boundaries
                        block.setValue(PARTITION_NUMBER, rowNum, partitionNumber + ":::" + partitionBoundaryFrom + ":::" + partitionBoundaryTo + ":::" + partitionColumn);
                        // we wrote 1 row so we return 1
                        return 1;
                    });
                }
            }
        }
    } catch (SQLException sqlException) {
        throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
}
Also used : ST(org.stringtemplate.v4.ST) STGroupDir(org.stringtemplate.v4.STGroupDir) STGroup(org.stringtemplate.v4.STGroup) SQLException(java.sql.SQLException) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block)

Example 85 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class TeradataMetadataHandler method doGetSplits.

@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    // TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11