use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class ExampleMetadataHandlerTest method getPartitions.
@Test
public void getPartitions() throws Exception {
if (!enableTests) {
// We do this because until you complete the tutorial these tests will fail. When you attempt to publis
// using ../toos/publish.sh ... it will set the publishing flag and force these tests. This is how we
// avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
// on purpose since this is a somewhat odd pattern.
logger.info("getPartitions: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
return;
}
logger.info("doGetTableLayout - enter");
Schema tableSchema = SchemaBuilder.newBuilder().addIntField("day").addIntField("month").addIntField("year").build();
Set<String> partitionCols = new HashSet<>();
partitionCols.add("day");
partitionCols.add("month");
partitionCols.add("year");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("day", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 0)), false));
constraintsMap.put("month", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 0)), false));
constraintsMap.put("year", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 2000)), false));
GetTableLayoutRequest req = null;
GetTableLayoutResponse res = null;
try {
req = new GetTableLayoutRequest(fakeIdentity(), "queryId", "default", new TableName("schema1", "table1"), new Constraints(constraintsMap), tableSchema, partitionCols);
res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout - {}", res);
Block partitions = res.getPartitions();
for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
}
assertTrue(partitions.getRowCount() > 0);
logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
} finally {
try {
req.close();
res.close();
} catch (Exception ex) {
logger.error("doGetTableLayout: ", ex);
}
}
logger.info("doGetTableLayout - exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method readWithConstraint.
/**
* Here we generate our simulated row data. A real connector would instead connect to the actual source and read
* the data corresponding to the requested split.
*
* @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
* The BlockSpiller automatically handles applying constraints, chunking the response, encrypting, and spilling to S3.
* @param request The ReadRecordsRequest containing the split and other details about what to read.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker) {
long startTime = System.currentTimeMillis();
/**
* It is important to try and throw any throttling events before writing data since Athena may not be able to
* continue the query, due to consistency errors, if you throttle after writing data.
*/
if (simulateThrottle > 0 && count++ % simulateThrottle == 0) {
logger.info("readWithConstraint: throwing throttle Exception!");
throw new FederationThrottleException("Please slow down for this simulated throttling event");
}
logCaller(request);
Set<String> partitionCols = new HashSet<>();
String partitionColsMetadata = request.getSchema().getCustomMetadata().get("partitionCols");
if (partitionColsMetadata != null) {
partitionCols.addAll(Arrays.asList(partitionColsMetadata.split(",")));
}
int year = Integer.valueOf(request.getSplit().getProperty("year"));
int month = Integer.valueOf(request.getSplit().getProperty("month"));
int day = Integer.valueOf(request.getSplit().getProperty("day"));
final RowContext rowContext = new RowContext(year, month, day);
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
for (Field next : request.getSchema().getFields()) {
Extractor extractor = makeExtractor(next, rowContext);
if (extractor != null) {
builder.withExtractor(next.getName(), extractor);
} else {
builder.withFieldWriterFactory(next.getName(), makeFactory(next, rowContext));
}
}
GeneratedRowWriter rowWriter = builder.build();
for (int i = 0; i < numRowsPerSplit; i++) {
rowContext.seed = i;
rowContext.negative = i % 2 == 0;
if (!queryStatusChecker.isQueryRunning()) {
return;
}
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, rowContext) ? 1 : 0);
}
logger.info("readWithConstraint: Completed generating rows in {} ms", System.currentTimeMillis() - startTime);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class UserDefinedFunctionHandler method processRows.
/**
* Processes a group by rows. This method takes in a block of data (containing multiple rows), process them and
* returns multiple rows of the output column in a block.
* <p>
* UDF methods are invoked row-by-row in a for loop. Arrow values are converted to Java Objects and then passed into
* the UDF java method. This is not very efficient because we might potentially be doing a lot of data copying.
* Advanced users could choose to override this method and directly deal with Arrow data to achieve better
* performance.
*
* @param allocator arrow memory allocator
* @param udfMethod the extracted java method matching the User-Defined-Function defined in Athena.
* @param inputRecords input data in Arrow format
* @param outputSchema output data schema in Arrow format
* @return output data in Arrow format
*/
protected Block processRows(BlockAllocator allocator, Method udfMethod, Block inputRecords, Schema outputSchema) throws Exception {
int rowCount = inputRecords.getRowCount();
List<ArrowValueProjector> valueProjectors = Lists.newArrayList();
for (Field field : inputRecords.getFields()) {
FieldReader fieldReader = inputRecords.getFieldReader(field.getName());
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
valueProjectors.add(arrowValueProjector);
}
Field outputField = outputSchema.getFields().get(0);
GeneratedRowWriter outputRowWriter = createOutputRowWriter(outputField, valueProjectors, udfMethod);
Block outputRecords = allocator.createBlock(outputSchema);
outputRecords.setRowCount(rowCount);
try {
for (int rowNum = 0; rowNum < rowCount; ++rowNum) {
outputRowWriter.writeRow(outputRecords, rowNum, rowNum);
}
} catch (Throwable t) {
try {
outputRecords.close();
} catch (Exception e) {
logger.error("Error closing output block", e);
}
throw t;
}
return outputRecords;
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class EquatableValueSet method subtract.
private static Block subtract(BlockAllocator allocator, EquatableValueSet left, EquatableValueSet right) {
Block resultBlock = BlockUtils.newEmptyBlock(allocator, DEFAULT_COLUMN, left.getType());
FieldVector result = resultBlock.getFieldVector(DEFAULT_COLUMN);
Block lhsBlock = left.getValues();
FieldReader lhs = lhsBlock.getFieldReader(DEFAULT_COLUMN);
int count = 0;
for (int i = 0; i < lhsBlock.getRowCount(); i++) {
lhs.setPosition(i);
if (!isPresent(lhs.readObject(), right.valueBlock)) {
BlockUtils.setValue(result, count++, lhs.readObject());
}
}
resultBlock.setRowCount(count);
return resultBlock;
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class EquatableValueSet method intersect.
private static Block intersect(BlockAllocator allocator, EquatableValueSet left, EquatableValueSet right) {
Block resultBlock = BlockUtils.newEmptyBlock(allocator, DEFAULT_COLUMN, left.getType());
FieldVector result = resultBlock.getFieldVector(DEFAULT_COLUMN);
Block lhsBlock = left.getValues();
FieldReader lhs = lhsBlock.getFieldReader(DEFAULT_COLUMN);
int count = 0;
for (int i = 0; i < lhsBlock.getRowCount(); i++) {
lhs.setPosition(i);
if (isPresent(lhs.readObject(), right.valueBlock)) {
BlockUtils.setValue(result, count++, lhs.readObject());
}
}
resultBlock.setRowCount(count);
return resultBlock;
}
Aggregations