Search in sources :

Example 56 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class EquatableValueSet method union.

private static Block union(BlockAllocator allocator, EquatableValueSet left, EquatableValueSet right) {
    Block resultBlock = BlockUtils.newEmptyBlock(allocator, DEFAULT_COLUMN, left.getType());
    FieldVector result = resultBlock.getFieldVector(DEFAULT_COLUMN);
    Block lhsBlock = left.getValues();
    FieldReader lhs = lhsBlock.getFieldReader(DEFAULT_COLUMN);
    int count = 0;
    for (int i = 0; i < lhsBlock.getRowCount(); i++) {
        lhs.setPosition(i);
        BlockUtils.setValue(result, count++, lhs.readObject());
    }
    Block rhsBlock = right.getValues();
    FieldReader rhs = rhsBlock.getFieldReader(DEFAULT_COLUMN);
    for (int i = 0; i < rhsBlock.getRowCount(); i++) {
        rhs.setPosition(i);
        if (!isPresent(rhs.readObject(), left.valueBlock)) {
            BlockUtils.setValue(result, count++, rhs.readObject());
        }
    }
    resultBlock.setRowCount(count);
    return resultBlock;
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block) FieldVector(org.apache.arrow.vector.FieldVector) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader)

Example 57 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class MarkerFactory method close.

@Override
public void close() throws Exception {
    for (Block next : sharedMarkerBlocks.values()) {
        next.close();
    }
    sharedMarkerBlocks.clear();
    markerLeases.clear();
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block)

Example 58 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandlerTest method assertThatSpillLocationContainsData.

private void assertThatSpillLocationContainsData(byte[] bytes, List<List<Object>> expected) {
    Block block = new AesGcmBlockCrypto(allocator).decrypt(TestConstants.ENCRYPTION_KEY, bytes, TestConstants.SCHEMA);
    assertThat(TestUtils.readBlockDataAsColumns(TestConstants.SCHEMA, block)).containsExactlyElementsOf(expected);
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block) AesGcmBlockCrypto(com.amazonaws.athena.connector.lambda.security.AesGcmBlockCrypto)

Example 59 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandler method doReadRecords.

@Override
@SuppressWarnings("MustBeClosedChecker")
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
    log.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
    log.debug("Reading records with constraints: {}", request.getConstraints());
    SpillConfig spillConfig = getSpillConfig(request);
    S3Spiller spiller = new S3Spiller(amazonS3, spillConfig, allocator);
    List<String> columnNames = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    // create a temporary block to obtain a handle to the BufferAllocator and allocator id
    BufferAllocator bufferAllocator;
    String allocatorId;
    try (Block block = allocator.createBlock(request.getSchema())) {
        bufferAllocator = block.getFieldVectors().get(0).getAllocator();
        allocatorId = block.getAllocatorId();
    }
    throttlingInvoker.setBlockSpiller(spiller);
    try (QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId());
        InputStream is = throttlingInvoker.invoke(() -> recordService.fetchSlice(foundryAuthProvider.getAuthHeader(), FetchSliceRequest.builder().slice(Slices.INSTANCE.fromSplit(request.getSplit())).columnNames(columnNames).maxBatchSize(SafeLong.of(spillConfig.getMaxBlockBytes())).build()))) {
        // we do not auto-close the reader to avoid releasing the buffers before serialization in the case
        // the block is held in memory
        PeekableArrowStreamReader reader = new PeekableArrowStreamReader(is, bufferAllocator);
        VectorSchemaRoot vectorSchemaRoot = reader.getVectorSchemaRoot();
        Block block = new Block(allocatorId, request.getSchema(), vectorSchemaRoot);
        reader.loadNextBatch();
        // spill if we have more blocks to read or the current block is too large to return
        if (reader.hasNextBatch() || block.getSize() > spillConfig.getMaxInlineBlockSize()) {
            do {
                spiller.spillBlock(block);
            } while (queryStatusChecker.isQueryRunning() && reader.loadNextBatch());
            // we have spilled so we can clean up the reader
            reader.close();
            return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
        } else {
            // no more batches so immediately return the block
            return new ReadRecordsResponse(request.getCatalogName(), block);
        }
    }
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) BufferAllocator(org.apache.arrow.memory.BufferAllocator)

Example 60 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    for (int i = 0; i < 2; i++) {
        EncryptionKey encryptionKey = (i % 2 == 0) ? keyFactory.create() : null;
        logger.info("doReadRecordsSpill: Using encryptionKey[" + encryptionKey + "]");
        Map<String, ValueSet> constraintsMap = new HashMap<>();
        constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
        constraintsMap.put("unknown", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
        constraintsMap.put("unknown2", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
        ReadRecordsRequest request = new ReadRecordsRequest(IdentityUtil.fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), encryptionKey).add("year", "10").add("month", "10").add("day", "10").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
        1_600_000L, 1000L);
        ObjectMapperUtil.assertSerialization(request);
        RecordResponse rawResponse = recordService.readRecords(request);
        ObjectMapperUtil.assertSerialization(rawResponse);
        assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
        try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
            logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
            assertTrue(response.getNumberBlocks() > 1);
            int blockNum = 0;
            for (SpillLocation next : response.getRemoteBlocks()) {
                S3SpillLocation spillLocation = (S3SpillLocation) next;
                try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                    logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                    // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                    logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                    assertNotNull(BlockUtils.rowToString(block, 0));
                }
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Test(org.junit.Test)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11