Search in sources :

Example 6 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ExampleMetadataHandlerTest method doGetSplits.

/**
 * The goal of this test is to test happy case for getting splits and also to exercise the continuation token
 * logic specifically.
 */
@Test
public void doGetSplits() {
    logger.info("doGetSplits: enter");
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    // This is the schema that ExampleMetadataHandler has layed out for a 'Partition' so we need to populate this
    // minimal set of info here.
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(16, false)).addField(monthCol, new ArrowType.Int(16, false)).addField(dayCol, new ArrowType.Int(16, false)).addField(ExampleMetadataHandler.PARTITION_LOCATION, new ArrowType.Utf8()).addField(ExampleMetadataHandler.SERDE, new ArrowType.Utf8()).build();
    List<String> partitionCols = new ArrayList<>();
    partitionCols.add(yearCol);
    partitionCols.add(monthCol);
    partitionCols.add(dayCol);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(dayCol, SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 20)), false));
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 100;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
        BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.PARTITION_LOCATION), i, String.valueOf(i));
        BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.SERDE), i, "TextInputType");
    }
    partitions.setRowCount(num_partitions);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(IdentityUtil.fakeIdentity(), "queryId", "catalog_name", new TableName("schema", "table_name"), partitions, partitionCols, new Constraints(constraintsMap), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        ObjectMapperUtil.assertSerialization(req);
        logger.info("doGetSplits: req[{}]", req);
        metadataHandler.setEncryption(numContinuations % 2 == 0);
        logger.info("doGetSplits: Toggle encryption " + (numContinuations % 2 == 0));
        MetadataResponse rawResponse = metadataHandler.doGetSplits(allocator, req);
        ObjectMapperUtil.assertSerialization(rawResponse);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetSplits: continuationToken[{}] - numSplits[{}] - maxSplits[{}]", new Object[] { continuationToken, response.getSplits().size(), MAX_SPLITS_PER_REQUEST });
        for (Split nextSplit : response.getSplits()) {
            if (numContinuations % 2 == 0) {
                assertNotNull(nextSplit.getEncryptionKey());
            } else {
                assertNull(nextSplit.getEncryptionKey());
            }
            assertNotNull(nextSplit.getProperty(SplitProperties.LOCATION.getId()));
            assertNotNull(nextSplit.getProperty(SplitProperties.SERDE.getId()));
            assertNotNull(nextSplit.getProperty(SplitProperties.SPLIT_PART.getId()));
        }
        assertTrue("Continuation criteria violated", (response.getSplits().size() == MAX_SPLITS_PER_REQUEST && response.getContinuationToken() != null) || response.getSplits().size() < MAX_SPLITS_PER_REQUEST);
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertTrue(numContinuations > 0);
    logger.info("doGetSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 7 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ExampleMetadataHandlerTest method doGetTableLayout.

/**
 * 200,000,000 million partitions pruned down to 38,000 and transmitted in 25 seconds
 *
 * @throws Exception
 */
@Test
public void doGetTableLayout() throws Exception {
    logger.info("doGetTableLayout - enter");
    Schema tableSchema = SchemaBuilder.newBuilder().addIntField("day").addIntField("month").addIntField("year").build();
    Set<String> partitionCols = new HashSet<>();
    partitionCols.add("day");
    partitionCols.add("month");
    partitionCols.add("year");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("day", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 20)), false));
    constraintsMap.put("month", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 2)), false));
    constraintsMap.put("year", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 1900)), false));
    GetTableLayoutRequest req = null;
    GetTableLayoutResponse res = null;
    try {
        req = new GetTableLayoutRequest(IdentityUtil.fakeIdentity(), "queryId", "default", new TableName("schema1", "table1"), new Constraints(constraintsMap), tableSchema, partitionCols);
        ObjectMapperUtil.assertSerialization(req);
        res = metadataHandler.doGetTableLayout(allocator, req);
        ObjectMapperUtil.assertSerialization(res);
        logger.info("doGetTableLayout - {}", res);
        Block partitions = res.getPartitions();
        for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
            logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
        }
        assertTrue(partitions.getRowCount() > 0);
        logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
    } finally {
        try {
            req.close();
            res.close();
        } catch (Exception ex) {
            logger.error("doGetTableLayout: ", ex);
        }
    }
    logger.info("doGetTableLayout - exit");
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) LambdaFunctionException(com.amazonaws.services.lambda.invoke.LambdaFunctionException) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class BlockCryptoTest method test.

@Test
public void test() {
    Schema schema = SchemaBuilder.newBuilder().addField("col1", new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).build();
    Block expected = allocator.createBlock(schema);
    BlockUtils.setValue(expected.getFieldVector("col1"), 1, 100);
    BlockUtils.setValue(expected.getFieldVector("col2"), 1, "VarChar");
    BlockUtils.setValue(expected.getFieldVector("col1"), 1, 101);
    BlockUtils.setValue(expected.getFieldVector("col2"), 1, "VarChar1");
    expected.setRowCount(2);
    AesGcmBlockCrypto crypto = new AesGcmBlockCrypto(new BlockAllocatorImpl());
    EncryptionKey key = keyFactory.create();
    byte[] cypher = crypto.encrypt(key, expected);
    Block actual = crypto.decrypt(key, cypher, schema);
    assertEquals(expected, actual);
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) Block(com.amazonaws.athena.connector.lambda.data.Block) Test(org.junit.Test)

Example 9 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class BlockSerializationTest method serializationTest.

@Test
public void serializationTest() throws IOException {
    logger.info("serializationTest - enter");
    Block expected = BlockUtils.newBlock(otherAllocator, "col1", Types.MinorType.INT.getType(), 21);
    ObjectMapper serializer = ObjectMapperFactory.create(new BlockAllocatorImpl());
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    serializer.writeValue(out, expected);
    Block actual = serializer.readValue(new ByteArrayInputStream(out.toByteArray()), Block.class);
    assertEquals(expected, actual);
    logger.info("serializationTest - exit");
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) ByteArrayInputStream(java.io.ByteArrayInputStream) Block(com.amazonaws.athena.connector.lambda.data.Block) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 10 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class GetTableLayoutResponseSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col3", new ArrowType.Utf8()).build();
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 10;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    partitions.setRowCount(num_partitions);
    expected = new GetTableLayoutResponse("test-catalog", new TableName("test-schema", "test-table"), partitions);
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetTableLayoutResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) Block(com.amazonaws.athena.connector.lambda.data.Block) Before(org.junit.Before)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11