Search in sources :

Example 16 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class CloudwatchRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("time", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 17 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class MetricsRecordHandlerTest method readMetricSamplesWithConstraint.

@Test
public void readMetricSamplesWithConstraint() throws Exception {
    logger.info("readMetricSamplesWithConstraint: enter");
    String namespace = "namespace";
    String metricName = "metricName";
    String statistic = "p90";
    String period = "60";
    String dimName = "dimName";
    String dimValue = "dimValue";
    List<Dimension> dimensions = Collections.singletonList(new Dimension().withName(dimName).withValue(dimValue));
    int numMetrics = 10;
    int numSamples = 10;
    AtomicLong numCalls = new AtomicLong(0);
    when(mockMetrics.getMetricData(any(GetMetricDataRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        numCalls.incrementAndGet();
        return mockMetricData(invocation, numMetrics, numSamples);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
    constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, statistic));
    constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
    constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
    S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    List<MetricStat> metricStats = new ArrayList<>();
    metricStats.add(new MetricStat().withMetric(new Metric().withNamespace(namespace).withMetricName(metricName).withDimensions(dimensions)).withPeriod(60).withStat(statistic));
    Split split = Split.newBuilder(spillLocation, keyFactory.create()).add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, MetricStatSerDe.serialize(metricStats)).add(METRIC_NAME_FIELD, metricName).add(NAMESPACE_FIELD, namespace).add(STATISTIC_FIELD, statistic).add(PERIOD_FIELD, period).build();
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE.getSchema(), split, new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
    100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("readMetricSamplesWithConstraint: rows[{}]", response.getRecordCount());
    assertEquals(numCalls.get() * numMetrics * numSamples, response.getRecords().getRowCount());
    logger.info("readMetricSamplesWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
    logger.info("readMetricSamplesWithConstraint: exit");
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) MetricStat(com.amazonaws.services.cloudwatch.model.MetricStat) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) Dimension(com.amazonaws.services.cloudwatch.model.Dimension) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetMetricDataRequest(com.amazonaws.services.cloudwatch.model.GetMetricDataRequest) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 18 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.

the class S3BlockSpiller method write.

/**
 * Writes (aka spills) a Block.
 */
protected SpillLocation write(Block block) {
    try {
        S3SpillLocation spillLocation = makeSpillLocation();
        EncryptionKey encryptionKey = spillConfig.getEncryptionKey();
        logger.info("write: Started encrypting block for write to {}", spillLocation);
        byte[] bytes = blockCrypto.encrypt(encryptionKey, block);
        totalBytesSpilled.addAndGet(bytes.length);
        logger.info("write: Started spilling block of size {} bytes", bytes.length);
        amazonS3.putObject(spillLocation.getBucket(), spillLocation.getKey(), new ByteArrayInputStream(bytes), new ObjectMetadata());
        logger.info("write: Completed spilling block of size {} bytes", bytes.length);
        return spillLocation;
    } catch (RuntimeException ex) {
        asyncException.compareAndSet(null, ex);
        logger.warn("write: Encountered error while writing block.", ex);
        throw ex;
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata)

Example 19 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project foundry-athena-query-federation-connector by palantir.

the class S3Spiller method makeSpillLocation.

private S3SpillLocation makeSpillLocation() {
    S3SpillLocation splitSpillLocation = (S3SpillLocation) spillConfig.getSpillLocation();
    if (!splitSpillLocation.isDirectory()) {
        throw new SafeRuntimeException("Split's SpillLocation must be a directory because multiple blocks may be spilled.");
    }
    String blockKey = splitSpillLocation.getKey() + "." + spillNumber.getAndIncrement();
    return new S3SpillLocation(splitSpillLocation.getBucket(), blockKey, false);
}
Also used : SafeRuntimeException(com.palantir.logsafe.exceptions.SafeRuntimeException) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)

Example 20 with S3SpillLocation

use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project foundry-athena-query-federation-connector by palantir.

the class S3Spiller method write.

private SpillLocation write(Block block) {
    try {
        S3SpillLocation spillLocation = makeSpillLocation();
        EncryptionKey encryptionKey = spillConfig.getEncryptionKey();
        log.info("write: Started encrypting block for write to {}", spillLocation);
        byte[] bytes = blockCrypto.encrypt(encryptionKey, block);
        totalBytesSpilled.addAndGet(bytes.length);
        log.info("write: Started spilling block of size {} bytes", bytes.length);
        ObjectMetadata objectMetadata = new ObjectMetadata();
        objectMetadata.setContentLength(bytes.length);
        amazonS3.putObject(spillLocation.getBucket(), spillLocation.getKey(), new ByteArrayInputStream(bytes), objectMetadata);
        log.info("write: Completed spilling block of size {} bytes", bytes.length);
        return spillLocation;
    } catch (RuntimeException ex) {
        asyncException.compareAndSet(null, ex);
        log.warn("write: Encountered error while writing block.", ex);
        throw ex;
    }
}
Also used : SafeRuntimeException(com.palantir.logsafe.exceptions.SafeRuntimeException) ByteArrayInputStream(java.io.ByteArrayInputStream) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata)

Aggregations

S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)28 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)22 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)22 Test (org.junit.Test)22 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)20 HashMap (java.util.HashMap)20 Matchers.anyString (org.mockito.Matchers.anyString)19 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 InvocationOnMock (org.mockito.invocation.InvocationOnMock)15 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)13 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)13 Block (com.amazonaws.athena.connector.lambda.data.Block)12 Split (com.amazonaws.athena.connector.lambda.domain.Split)12 ArrayList (java.util.ArrayList)12 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)11 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)8 PutObjectResult (com.amazonaws.services.s3.model.PutObjectResult)8 ByteArrayInputStream (java.io.ByteArrayInputStream)8 Schema (org.apache.arrow.vector.types.pojo.Schema)8