Search in sources :

Example 6 with SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.

the class NeptuneRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    allocator = new BlockAllocatorImpl();
    // Greater Than filter
    HashMap<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("property1", SortedRangeSet.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 9)));
    buildGraphTraversal();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPGVertexForRead, Split.newBuilder(splitLoc, keyFactory.create()).build(), // ~1.5MB so we should see some spill
    new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() == 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 7 with SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.

the class PostGreSqlMetadataHandler method doGetSplits.

@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
    int partitionContd = decodeContinuationToken(getSplitsRequest);
    Set<Split> splits = new HashSet<>();
    Block partitions = getSplitsRequest.getPartitions();
    boolean splitterUsed = false;
    if (partitions.getRowCount() == 1) {
        FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
        partitionsSchemaFieldReader.setPosition(0);
        FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
        partitionsFieldReader.setPosition(0);
        if (ALL_PARTITIONS.equals(partitionsSchemaFieldReader.readText().toString()) && ALL_PARTITIONS.equals(partitionsFieldReader.readText().toString())) {
            for (String splitClause : getSplitClauses(getSplitsRequest.getTableName())) {
                // Every split must have a unique location if we wish to spill to avoid failures
                SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
                Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(splitClause));
                splits.add(splitBuilder.build());
                if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
                    throw new RuntimeException("Max splits supported with splitter " + MAX_SPLITS_PER_REQUEST);
                }
                splitterUsed = true;
            }
        }
    }
    if (!splitterUsed) {
        for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
            FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
            partitionsSchemaFieldReader.setPosition(curPartition);
            FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
            partitionsFieldReader.setPosition(curPartition);
            // Every split must have a unique location if we wish to spill to avoid failures
            SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
            LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), String.valueOf(partitionsFieldReader.readText()));
            Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(partitionsFieldReader.readText()));
            splits.add(splitBuilder.build());
            if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
                // We exceeded the number of split we want to return in a single request, return and provide a continuation token.
                return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
            }
        }
    }
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) HashSet(java.util.HashSet)

Example 8 with SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("c_current_cdemo_sk", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "10000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 9 with SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.

the class AwsCmdbMetadataHandler method doGetSplits.

/**
 * Delegates to the TableProvider that is registered for the requested table.
 *
 * @see MetadataHandler
 */
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
    TableProvider tableProvider = tableProviders.get(getSplitsRequest.getTableName());
    if (tableProvider == null) {
        throw new RuntimeException("Unknown table " + getSplitsRequest.getTableName());
    }
    // Every split needs a unique spill location.
    SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
    EncryptionKey encryptionKey = makeEncryptionKey();
    Split split = Split.newBuilder(spillLocation, encryptionKey).build();
    return new GetSplitsResponse(getSplitsRequest.getCatalogName(), split);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) TableProvider(com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider) Split(com.amazonaws.athena.connector.lambda.domain.Split)

Example 10 with SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.

the class AbstractTableProviderTest method validateRead.

protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey) {
    int blockNum = 0;
    int rowNum = 0;
    for (SpillLocation next : locations) {
        S3SpillLocation spillLocation = (S3SpillLocation) next;
        try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
            logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
            for (int i = 0; i < block.getRowCount(); i++) {
                logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
                rowNum++;
                validateRow(block, i);
            }
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
    assertEquals(getExpectedRows(), rowNum);
}
Also used : SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block)

Aggregations

SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)32 Block (com.amazonaws.athena.connector.lambda.data.Block)23 Split (com.amazonaws.athena.connector.lambda.domain.Split)21 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)17 HashSet (java.util.HashSet)16 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)15 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)13 HashMap (java.util.HashMap)11 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)9 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)9 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)9 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)9 Test (org.junit.Test)9 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)8 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)7 Matchers.anyString (org.mockito.Matchers.anyString)7 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)5 EncryptionKey (com.amazonaws.athena.connector.lambda.security.EncryptionKey)5 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 PutObjectResult (com.amazonaws.services.s3.model.PutObjectResult)3