Search in sources :

Example 1 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class S3BlockSpillerTest method spillTest.

@Test
public void spillTest() throws IOException {
    logger.info("spillTest: enter");
    logger.info("spillTest: starting write test");
    final ByteHolder byteHolder = new ByteHolder();
    when(mockS3.putObject(eq(bucket), anyString(), anyObject(), anyObject())).thenAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
            byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
            return mock(PutObjectResult.class);
        }
    });
    SpillLocation blockLocation = blockWriter.write(expected);
    if (blockLocation instanceof S3SpillLocation) {
        assertEquals(bucket, ((S3SpillLocation) blockLocation).getBucket());
        assertEquals(prefix + "/" + requestId + "/" + splitId + ".0", ((S3SpillLocation) blockLocation).getKey());
    }
    SpillLocation blockLocation2 = blockWriter.write(expected);
    if (blockLocation2 instanceof S3SpillLocation) {
        assertEquals(bucket, ((S3SpillLocation) blockLocation2).getBucket());
        assertEquals(prefix + "/" + requestId + "/" + splitId + ".1", ((S3SpillLocation) blockLocation2).getKey());
    }
    verify(mockS3, times(1)).putObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".0"), anyObject(), anyObject());
    verify(mockS3, times(1)).putObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"), anyObject(), anyObject());
    verifyNoMoreInteractions(mockS3);
    reset(mockS3);
    logger.info("spillTest: Starting read test.");
    when(mockS3.getObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"))).thenAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            S3Object mockObject = mock(S3Object.class);
            when(mockObject.getObjectContent()).thenReturn(new S3ObjectInputStream(new ByteArrayInputStream(byteHolder.getBytes()), null));
            return mockObject;
        }
    });
    Block block = blockWriter.read((S3SpillLocation) blockLocation2, spillConfig.getEncryptionKey(), expected.getSchema());
    assertEquals(expected, block);
    verify(mockS3, times(1)).getObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"));
    verifyNoMoreInteractions(mockS3);
    logger.info("spillTest: exit");
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) ByteArrayInputStream(java.io.ByteArrayInputStream) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) InputStream(java.io.InputStream) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) S3Object(com.amazonaws.services.s3.model.S3Object) Matchers.anyObject(org.mockito.Matchers.anyObject) S3Object(com.amazonaws.services.s3.model.S3Object) Test(org.junit.Test)

Example 2 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(100);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 1);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 3 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class JdbcRecordHandlerTest method readWithConstraint.

@Test
public void readWithConstraint() throws SQLException {
    ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
    Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
    TableName inputTableName = new TableName("testSchema", "testTable");
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema fieldSchema = expectedSchemaBuilder.build();
    BlockAllocator allocator = new BlockAllocatorImpl();
    S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
    Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
    String[] schema = { "testCol1", "testCol2" };
    int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
    Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
    Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
    SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
    Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
    BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
    ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
    Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
        ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
        int n = byteArrayInputStream.available();
        byte[] bytes = new byte[n];
        byteArrayInputStream.read(bytes, 0, n);
        String data = new String(bytes, StandardCharsets.UTF_8);
        Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
        return new PutObjectResult();
    });
    this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) Connection(java.sql.Connection) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) com.amazonaws.athena.connector.lambda.data.writers.extractors(com.amazonaws.athena.connector.lambda.data.writers.extractors) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) Answer(org.mockito.stubbing.Answer) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) SQLException(java.sql.SQLException) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) ByteArrayInputStream(java.io.ByteArrayInputStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestBase(com.amazonaws.athena.connectors.jdbc.TestBase) AmazonS3(com.amazonaws.services.s3.AmazonS3) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetSecretValueResult(com.amazonaws.services.secretsmanager.model.GetSecretValueResult) Before(org.junit.Before) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) DatabaseConnectionConfig(com.amazonaws.athena.connectors.jdbc.connection.DatabaseConnectionConfig) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) StandardCharsets(java.nio.charset.StandardCharsets) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Mockito(org.mockito.Mockito) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) Assert(org.junit.Assert) JdbcConnectionFactory(com.amazonaws.athena.connectors.jdbc.connection.JdbcConnectionFactory) Collections(java.util.Collections) GetSecretValueRequest(com.amazonaws.services.secretsmanager.model.GetSecretValueRequest) Types(java.sql.Types) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) Schema(org.apache.arrow.vector.types.pojo.Schema) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) Split(com.amazonaws.athena.connector.lambda.domain.Split) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) Test(org.junit.Test)

Example 4 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class NeptuneRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    allocator = new BlockAllocatorImpl();
    // Greater Than filter
    HashMap<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("property1", SortedRangeSet.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 9)));
    buildGraphTraversal();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPGVertexForRead, Split.newBuilder(splitLoc, keyFactory.create()).build(), // ~1.5MB so we should see some spill
    new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() == 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 5 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class NeptuneRecordHandlerTest method invokeAndAssert.

/**
 * Used to invoke each test condition and assert
 *
 * @param constraintMap       Constraint Map for Gremlin Query
 * @param expectedRecordCount Expected Row Count as per Gremlin Query Response
 */
private void invokeAndAssert(Schema schemaPG, HashMap<String, ValueSet> constraintMap, Integer expectedRecordCount) throws Exception {
    S3SpillLocation spillLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    allocator = new BlockAllocatorImpl();
    buildGraphTraversal();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPG, Split.newBuilder(spillLoc, null).build(), new Constraints(constraintMap), 100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    assertTrue(response.getRecords().getRowCount() == expectedRecordCount);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse)

Aggregations

S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)28 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)22 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)22 Test (org.junit.Test)22 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)20 HashMap (java.util.HashMap)20 Matchers.anyString (org.mockito.Matchers.anyString)19 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 InvocationOnMock (org.mockito.invocation.InvocationOnMock)15 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)13 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)13 Block (com.amazonaws.athena.connector.lambda.data.Block)12 Split (com.amazonaws.athena.connector.lambda.domain.Split)12 ArrayList (java.util.ArrayList)12 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)11 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)8 PutObjectResult (com.amazonaws.services.s3.model.PutObjectResult)8 ByteArrayInputStream (java.io.ByteArrayInputStream)8 Schema (org.apache.arrow.vector.types.pojo.Schema)8