use of com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector in project aws-athena-query-federation by awslabs.
the class ExampleUserDefinedFunctionHandlerTest method testGetDefaultValueIfNullMethod.
@Test
public void testGetDefaultValueIfNullMethod() throws Exception {
Schema inputSchema = SchemaBuilder.newBuilder().addField("input", Types.MinorType.BIGINT.getType()).build();
Schema outputSchema = SchemaBuilder.newBuilder().addField("output", Types.MinorType.BIGINT.getType()).build();
Block inputRecords = allocator.createBlock(inputSchema);
inputRecords.setRowCount(2);
BigIntVector fieldVector = (BigIntVector) inputRecords.getFieldVector("input");
fieldVector.setSafe(0, 123l);
fieldVector.setNull(1);
UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "get_default_value_if_null");
Block outputRecords = response.getRecords();
assertEquals(2, outputRecords.getRowCount());
FieldReader fieldReader = outputRecords.getFieldReader("output");
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
assertEquals(exampleUserDefinedFunctionHandler.get_default_value_if_null(123l), arrowValueProjector.project(0));
assertEquals(exampleUserDefinedFunctionHandler.get_default_value_if_null(null), arrowValueProjector.project(1));
}
use of com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector in project aws-athena-query-federation by awslabs.
the class ExampleUserDefinedFunctionHandlerTest method testMultiplyMethod.
@Test
public void testMultiplyMethod() throws Exception {
Schema inputSchema = SchemaBuilder.newBuilder().addField("factor1", Types.MinorType.INT.getType()).addField("factor2", Types.MinorType.INT.getType()).build();
Schema outputSchema = SchemaBuilder.newBuilder().addField("product", Types.MinorType.INT.getType()).build();
Block inputRecords = allocator.createBlock(inputSchema);
inputRecords.setRowCount(1);
IntVector inputVector1 = (IntVector) inputRecords.getFieldVector("factor1");
IntVector inputVector2 = (IntVector) inputRecords.getFieldVector("factor2");
inputVector1.setSafe(0, 2);
inputVector2.setSafe(0, 3);
UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "multiply");
Block outputRecords = response.getRecords();
assertEquals(1, outputRecords.getRowCount());
FieldReader fieldReader = outputRecords.getFieldReader("product");
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
assertEquals(exampleUserDefinedFunctionHandler.multiply(2, 3), arrowValueProjector.project(0));
}
use of com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector in project aws-athena-query-federation by awslabs.
the class ExampleUserDefinedFunctionHandlerTest method testToJsonMethod.
@Test
public void testToJsonMethod() throws Exception {
Schema inputSchema = SchemaBuilder.newBuilder().addStructField("struct").addChildField("struct", "int", Types.MinorType.INT.getType()).addChildField("struct", "double", Types.MinorType.FLOAT8.getType()).addChildField("struct", "string", Types.MinorType.VARCHAR.getType()).build();
Schema outputSchema = SchemaBuilder.newBuilder().addField("json", Types.MinorType.VARCHAR.getType()).build();
Block inputRecords = allocator.createBlock(inputSchema);
inputRecords.setRowCount(1);
FieldVector fieldVector = inputRecords.getFieldVector("struct");
Map<String, Object> struct = new HashMap<>();
struct.put("int", 10);
struct.put("double", 2.3);
struct.put("string", "test_string");
BlockUtils.setComplexValue(fieldVector, 0, FieldResolver.DEFAULT, struct);
UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "to_json");
Block outputRecords = response.getRecords();
assertEquals(1, outputRecords.getRowCount());
FieldReader fieldReader = outputRecords.getFieldReader("json");
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
assertEquals(exampleUserDefinedFunctionHandler.to_json(struct), arrowValueProjector.project(0));
}
use of com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector in project aws-athena-query-federation by awslabs.
the class UserDefinedFunctionHandler method processRows.
/**
* Processes a group by rows. This method takes in a block of data (containing multiple rows), process them and
* returns multiple rows of the output column in a block.
* <p>
* UDF methods are invoked row-by-row in a for loop. Arrow values are converted to Java Objects and then passed into
* the UDF java method. This is not very efficient because we might potentially be doing a lot of data copying.
* Advanced users could choose to override this method and directly deal with Arrow data to achieve better
* performance.
*
* @param allocator arrow memory allocator
* @param udfMethod the extracted java method matching the User-Defined-Function defined in Athena.
* @param inputRecords input data in Arrow format
* @param outputSchema output data schema in Arrow format
* @return output data in Arrow format
*/
protected Block processRows(BlockAllocator allocator, Method udfMethod, Block inputRecords, Schema outputSchema) throws Exception {
int rowCount = inputRecords.getRowCount();
List<ArrowValueProjector> valueProjectors = Lists.newArrayList();
for (Field field : inputRecords.getFields()) {
FieldReader fieldReader = inputRecords.getFieldReader(field.getName());
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
valueProjectors.add(arrowValueProjector);
}
Field outputField = outputSchema.getFields().get(0);
GeneratedRowWriter outputRowWriter = createOutputRowWriter(outputField, valueProjectors, udfMethod);
Block outputRecords = allocator.createBlock(outputSchema);
outputRecords.setRowCount(rowCount);
try {
for (int rowNum = 0; rowNum < rowCount; ++rowNum) {
outputRowWriter.writeRow(outputRecords, rowNum, rowNum);
}
} catch (Throwable t) {
try {
outputRecords.close();
} catch (Exception e) {
logger.error("Error closing output block", e);
}
throw t;
}
return outputRecords;
}
use of com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector in project aws-athena-query-federation by awslabs.
the class ExampleUserDefinedFunctionHandlerTest method testConcatenateMethod.
@Test
public void testConcatenateMethod() throws Exception {
Schema inputSchema = SchemaBuilder.newBuilder().addListField("list", Types.MinorType.VARCHAR.getType()).build();
Schema outputSchema = SchemaBuilder.newBuilder().addField("string", Types.MinorType.VARCHAR.getType()).build();
Block inputRecords = allocator.createBlock(inputSchema);
inputRecords.setRowCount(1);
FieldVector fieldVector = inputRecords.getFieldVector("list");
List<String> value = Lists.newArrayList("a", "b");
BlockUtils.setComplexValue(fieldVector, 0, FieldResolver.DEFAULT, value);
UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "concatenate");
Block outputRecords = response.getRecords();
assertEquals(1, outputRecords.getRowCount());
FieldReader fieldReader = outputRecords.getFieldReader("string");
ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
assertEquals(exampleUserDefinedFunctionHandler.concatenate(Lists.newArrayList("a", "b")), arrowValueProjector.project(0));
}
Aggregations