use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("time", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class MetricsRecordHandlerTest method readMetricSamplesWithConstraint.
@Test
public void readMetricSamplesWithConstraint() throws Exception {
logger.info("readMetricSamplesWithConstraint: enter");
String namespace = "namespace";
String metricName = "metricName";
String statistic = "p90";
String period = "60";
String dimName = "dimName";
String dimValue = "dimValue";
List<Dimension> dimensions = Collections.singletonList(new Dimension().withName(dimName).withValue(dimValue));
int numMetrics = 10;
int numSamples = 10;
AtomicLong numCalls = new AtomicLong(0);
when(mockMetrics.getMetricData(any(GetMetricDataRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
numCalls.incrementAndGet();
return mockMetricData(invocation, numMetrics, numSamples);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, statistic));
constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
List<MetricStat> metricStats = new ArrayList<>();
metricStats.add(new MetricStat().withMetric(new Metric().withNamespace(namespace).withMetricName(metricName).withDimensions(dimensions)).withPeriod(60).withStat(statistic));
Split split = Split.newBuilder(spillLocation, keyFactory.create()).add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, MetricStatSerDe.serialize(metricStats)).add(METRIC_NAME_FIELD, metricName).add(NAMESPACE_FIELD, namespace).add(STATISTIC_FIELD, statistic).add(PERIOD_FIELD, period).build();
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE.getSchema(), split, new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("readMetricSamplesWithConstraint: rows[{}]", response.getRecordCount());
assertEquals(numCalls.get() * numMetrics * numSamples, response.getRecords().getRowCount());
logger.info("readMetricSamplesWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
logger.info("readMetricSamplesWithConstraint: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class S3BlockSpiller method write.
/**
* Writes (aka spills) a Block.
*/
protected SpillLocation write(Block block) {
try {
S3SpillLocation spillLocation = makeSpillLocation();
EncryptionKey encryptionKey = spillConfig.getEncryptionKey();
logger.info("write: Started encrypting block for write to {}", spillLocation);
byte[] bytes = blockCrypto.encrypt(encryptionKey, block);
totalBytesSpilled.addAndGet(bytes.length);
logger.info("write: Started spilling block of size {} bytes", bytes.length);
amazonS3.putObject(spillLocation.getBucket(), spillLocation.getKey(), new ByteArrayInputStream(bytes), new ObjectMetadata());
logger.info("write: Completed spilling block of size {} bytes", bytes.length);
return spillLocation;
} catch (RuntimeException ex) {
asyncException.compareAndSet(null, ex);
logger.warn("write: Encountered error while writing block.", ex);
throw ex;
}
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project foundry-athena-query-federation-connector by palantir.
the class S3Spiller method makeSpillLocation.
private S3SpillLocation makeSpillLocation() {
S3SpillLocation splitSpillLocation = (S3SpillLocation) spillConfig.getSpillLocation();
if (!splitSpillLocation.isDirectory()) {
throw new SafeRuntimeException("Split's SpillLocation must be a directory because multiple blocks may be spilled.");
}
String blockKey = splitSpillLocation.getKey() + "." + spillNumber.getAndIncrement();
return new S3SpillLocation(splitSpillLocation.getBucket(), blockKey, false);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project foundry-athena-query-federation-connector by palantir.
the class S3Spiller method write.
private SpillLocation write(Block block) {
try {
S3SpillLocation spillLocation = makeSpillLocation();
EncryptionKey encryptionKey = spillConfig.getEncryptionKey();
log.info("write: Started encrypting block for write to {}", spillLocation);
byte[] bytes = blockCrypto.encrypt(encryptionKey, block);
totalBytesSpilled.addAndGet(bytes.length);
log.info("write: Started spilling block of size {} bytes", bytes.length);
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(bytes.length);
amazonS3.putObject(spillLocation.getBucket(), spillLocation.getKey(), new ByteArrayInputStream(bytes), objectMetadata);
log.info("write: Completed spilling block of size {} bytes", bytes.length);
return spillLocation;
} catch (RuntimeException ex) {
asyncException.compareAndSet(null, ex);
log.warn("write: Encountered error while writing block.", ex);
throw ex;
}
}
Aggregations