Search in sources :

Example 11 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HbaseMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() throws IOException {
    List<HRegionInfo> regionServers = new ArrayList<>();
    regionServers.add(TestUtils.makeRegion(1, "schema1", "table1"));
    regionServers.add(TestUtils.makeRegion(2, "schema1", "table1"));
    regionServers.add(TestUtils.makeRegion(3, "schema1", "table1"));
    regionServers.add(TestUtils.makeRegion(4, "schema1", "table1"));
    when(mockClient.getTableRegions(any())).thenReturn(regionServers);
    List<String> partitionCols = new ArrayList<>();
    Block partitions = BlockUtils.newBlock(allocator, "partitionId", Types.MinorType.INT.getType(), 0);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
    logger.info("doGetSplits: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
    assertTrue("Continuation criteria violated", response.getSplits().size() == 4);
    assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) ArrayList(java.util.ArrayList) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 12 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class BigQueryRecordHandler method outputResults.

/**
 * Iterates through all the results that comes back from BigQuery and saves the result to be read by the Athena Connector.
 *
 * @param spiller        The {@link BlockSpiller} provided when readWithConstraints() is called.
 * @param recordsRequest The {@link ReadRecordsRequest} provided when readWithConstraints() is called.
 * @param result         The {@link TableResult} provided by {@link BigQuery} client after a query has completed executing.
 */
private void outputResults(BlockSpiller spiller, ReadRecordsRequest recordsRequest, TableResult result) {
    logger.info("Inside outputResults: ");
    String timeStampColsList = Objects.toString(recordsRequest.getSchema().getCustomMetadata().get("timeStampCols"), "");
    logger.info("timeStampColsList: " + timeStampColsList);
    if (result != null) {
        for (FieldValueList row : result.iterateAll()) {
            spiller.writeRows((Block block, int rowNum) -> {
                boolean isMatched = true;
                for (Field field : recordsRequest.getSchema().getFields()) {
                    FieldValue fieldValue = row.get(field.getName());
                    Object val = getObjectFromFieldValue(field.getName(), fieldValue, field.getFieldType().getType(), timeStampColsList.contains(field.getName()));
                    isMatched &= block.offerValue(field.getName(), rowNum, val);
                    if (!isMatched) {
                        return 0;
                    }
                }
                return 1;
            });
        }
    }
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Block(com.amazonaws.athena.connector.lambda.data.Block) FieldValueList(com.google.cloud.bigquery.FieldValueList) FieldValue(com.google.cloud.bigquery.FieldValue) BigQueryUtils.getObjectFromFieldValue(com.amazonaws.athena.connectors.google.bigquery.BigQueryUtils.getObjectFromFieldValue)

Example 13 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ReadRecordsRequestSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
    constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
    constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
    Constraints constraints = new Constraints(constraintsMap);
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 10;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    partitions.setRowCount(num_partitions);
    SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("athena-virtuoso-test").withPrefix("lambda-spill").withQueryId("test-query-id").withSplitId("test-split-id").withIsDirectory(true).build();
    EncryptionKey encryptionKey = new EncryptionKey("test-key".getBytes(), "test-nonce".getBytes());
    Split split = Split.newBuilder(spillLocation, encryptionKey).add("year", "2017").add("month", "11").add("day", "1").build();
    expected = new ReadRecordsRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), schema, split, constraints, 100_000_000_000L, 100_000_000_000L);
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Before(org.junit.Before)

Example 14 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class UserDefinedFunctionResponseSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String productField = "product";
    Schema outputSchema = SchemaBuilder.newBuilder().addField(productField, Types.MinorType.INT.getType()).build();
    Block records = allocator.createBlock(outputSchema);
    int num_records = 10;
    for (int i = 0; i < num_records; i++) {
        BlockUtils.setValue(records.getFieldVector(productField), i, i * 2);
    }
    records.setRowCount(num_records);
    expected = new UserDefinedFunctionResponse(records, "test-method");
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "UserDefinedFunctionResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) Block(com.amazonaws.athena.connector.lambda.data.Block) UserDefinedFunctionResponse(com.amazonaws.athena.connector.lambda.udf.UserDefinedFunctionResponse) Before(org.junit.Before)

Example 15 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class JdbcRecordHandler method readWithConstraint.

@Override
public void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest, QueryStatusChecker queryStatusChecker) {
    LOGGER.info("{}: Catalog: {}, table {}, splits {}", readRecordsRequest.getQueryId(), readRecordsRequest.getCatalogName(), readRecordsRequest.getTableName(), readRecordsRequest.getSplit().getProperties());
    try (Connection connection = this.jdbcConnectionFactory.getConnection(getCredentialProvider())) {
        // For consistency. This is needed to be false to enable streaming for some database types.
        connection.setAutoCommit(false);
        try (PreparedStatement preparedStatement = buildSplitSql(connection, readRecordsRequest.getCatalogName(), readRecordsRequest.getTableName(), readRecordsRequest.getSchema(), readRecordsRequest.getConstraints(), readRecordsRequest.getSplit());
            ResultSet resultSet = preparedStatement.executeQuery()) {
            Map<String, String> partitionValues = readRecordsRequest.getSplit().getProperties();
            GeneratedRowWriter.RowWriterBuilder rowWriterBuilder = GeneratedRowWriter.newBuilder(readRecordsRequest.getConstraints());
            for (Field next : readRecordsRequest.getSchema().getFields()) {
                if (next.getType() instanceof ArrowType.List) {
                    rowWriterBuilder.withFieldWriterFactory(next.getName(), makeFactory(next));
                } else {
                    rowWriterBuilder.withExtractor(next.getName(), makeExtractor(next, resultSet, partitionValues));
                }
            }
            GeneratedRowWriter rowWriter = rowWriterBuilder.build();
            int rowsReturnedFromDatabase = 0;
            while (resultSet.next()) {
                if (!queryStatusChecker.isQueryRunning()) {
                    return;
                }
                blockSpiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, resultSet) ? 1 : 0);
                rowsReturnedFromDatabase++;
            }
            LOGGER.info("{} rows returned by database.", rowsReturnedFromDatabase);
            connection.commit();
        }
    } catch (SQLException sqlException) {
        throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) PreparedStatement(java.sql.PreparedStatement) Field(org.apache.arrow.vector.types.pojo.Field) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11