Search in sources :

Example 71 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class MySqlMetadataHandler method doGetSplits.

@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    // TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 72 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class MySqlMetadataHandler method getPartitions.

@Override
public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        final String escape = connection.getMetaData().getSearchStringEscape();
        List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
        try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
            ResultSet resultSet = preparedStatement.executeQuery()) {
            // Return a single partition if no partitions defined
            if (!resultSet.next()) {
                blockWriter.writeRows((Block block, int rowNum) -> {
                    block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
                    LOGGER.info("Adding partition {}", ALL_PARTITIONS);
                    // we wrote 1 row so we return 1
                    return 1;
                });
            } else {
                do {
                    final String partitionName = resultSet.getString(PARTITION_COLUMN_NAME);
                    // 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
                    // 2. This API is not paginated, we could use order by and limit clause with offsets here.
                    blockWriter.writeRows((Block block, int rowNum) -> {
                        block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
                        LOGGER.info("Adding partition {}", partitionName);
                        // we wrote 1 row so we return 1
                        return 1;
                    });
                } while (resultSet.next() && queryStatusChecker.isQueryRunning());
            }
        }
    } catch (SQLException sqlException) {
        throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) PreparedStatementBuilder(com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder)

Example 73 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HiveMetadataHandler method getPartitions.

/**
 * Used to get the hive partitions that must be read from the request table in order to satisfy the requested predicate.
 *
 * @param blockWriter Used to write rows (hive partitions) into the Apache Arrow response.
 * @param getTableLayoutRequest Provides details of the catalog, database, and table being queried as well as any filter predicate.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
 * @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws SQLException {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        Statement stmt = connection.createStatement();
        PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + getTableLayoutRequest.getTableName().getTableName().toUpperCase())) {
        boolean isTablePartitioned = false;
        ResultSet partitionResultset = stmt.executeQuery("show table extended like " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
        while (partitionResultset != null && partitionResultset.next()) {
            String partExists = partitionResultset.getString(1);
            if (partExists.toUpperCase().contains("PARTITIONED")) {
                String partValue = partExists.split(":")[1];
                if (partValue.toUpperCase().contains("TRUE")) {
                    isTablePartitioned = true;
                }
            }
        }
        LOGGER.debug("isTablePartitioned:" + isTablePartitioned);
        if (isTablePartitioned) {
            ResultSet partitionRs = stmt.executeQuery("show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
            Set<String> partition = new HashSet<>();
            while (partitionRs != null && partitionRs.next()) {
                partition.add(partitionRs.getString("Partition"));
            }
            if (!partition.isEmpty()) {
                Map<String, String> columnHashMap = getMetadataForGivenTable(psmt);
                addPartitions(partition, columnHashMap, blockWriter);
            }
        } else {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(HiveConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, HiveConstants.ALL_PARTITIONS);
                return 1;
            });
        }
    }
}
Also used : PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) HashSet(java.util.HashSet)

Example 74 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class OracleMetadataHandler method doGetSplits.

/**
 * @param blockAllocator
 * @param getSplitsRequest
 * @return
 */
@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
    LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    // TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
    for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
        FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
        locationReader.setPosition(curPartition);
        SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
        LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
        Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
        splits.add(splitBuilder.build());
        if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
            // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
            return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 75 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class PropertyGraphHandler method executeQuery.

/**
 * Used to read the row data associated with the provided Split.
 *
 * @param spiller            A BlockSpiller that should be used to write the row
 *                           data associated with this Split. The BlockSpiller
 *                           automatically handles chunking the response,
 *                           encrypting, and spilling to S3.
 * @param recordsRequest     Details of the read request, including: 1. The
 *                           Split 2. The Catalog, Database, and Table the read
 *                           request is for. 3. The filtering predicate (if any)
 *                           4. The columns required for projection.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing
 *                           work for a query that has already terminated
 * @throws Exception
 * @note Avoid writing >10 rows per-call to BlockSpiller.writeRow(...) because
 *       this will limit the BlockSpiller's ability to control Block size. The
 *       resulting increase in Block size may cause failures and reduced
 *       performance.
 */
public void executeQuery(ReadRecordsRequest recordsRequest, final QueryStatusChecker queryStatusChecker, final BlockSpiller spiller) throws Exception {
    logger.debug("readWithConstraint: enter - " + recordsRequest.getSplit());
    long numRows = 0;
    Client client = neptuneConnection.getNeptuneClientConnection();
    GraphTraversalSource graphTraversalSource = neptuneConnection.getTraversalSource(client);
    GraphTraversal graphTraversal = null;
    String labelName = recordsRequest.getTableName().getTableName();
    GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(recordsRequest.getConstraints());
    String type = recordsRequest.getSchema().getCustomMetadata().get("componenttype");
    TableSchemaMetaType tableSchemaMetaType = TableSchemaMetaType.valueOf(type.toUpperCase());
    logger.debug("readWithConstraint: schema type is " + tableSchemaMetaType.toString());
    if (tableSchemaMetaType != null) {
        switch(tableSchemaMetaType) {
            case VERTEX:
                graphTraversal = graphTraversalSource.V().hasLabel(labelName);
                getQueryPartForContraintsMap(graphTraversal, recordsRequest);
                graphTraversal = graphTraversal.valueMap().with(WithOptions.tokens);
                for (final Field nextField : recordsRequest.getSchema().getFields()) {
                    VertexRowWriter.writeRowTemplate(builder, nextField);
                }
                break;
            case EDGE:
                graphTraversal = graphTraversalSource.E().hasLabel(labelName);
                getQueryPartForContraintsMap(graphTraversal, recordsRequest);
                graphTraversal = graphTraversal.elementMap();
                for (final Field nextField : recordsRequest.getSchema().getFields()) {
                    EdgeRowWriter.writeRowTemplate(builder, nextField);
                }
                break;
        }
    }
    // log string equivalent of gremlin query
    logger.debug("readWithConstraint: enter - " + GroovyTranslator.of("g").translate(graphTraversal.asAdmin().getBytecode()));
    final GraphTraversal graphTraversalFinal1 = graphTraversal;
    final GeneratedRowWriter rowWriter1 = builder.build();
    while (graphTraversalFinal1.hasNext() && queryStatusChecker.isQueryRunning()) {
        numRows++;
        spiller.writeRows((final Block block, final int rowNum) -> {
            final Map obj = (Map) graphTraversalFinal1.next();
            return (rowWriter1.writeRow(block, rowNum, (Object) obj) ? 1 : 0);
        });
    }
    logger.info("readWithConstraint: numRows[{}]", numRows);
}
Also used : GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) TableSchemaMetaType(com.amazonaws.athena.connectors.neptune.propertygraph.Enums.TableSchemaMetaType) Field(org.apache.arrow.vector.types.pojo.Field) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) GraphTraversal(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal) Block(com.amazonaws.athena.connector.lambda.data.Block) Client(org.apache.tinkerpop.gremlin.driver.Client) Map(java.util.Map)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11