use of com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandler method makeFactory.
/**
* Create a field extractor for complex List type.
* @param field Field's metadata information.
* @return Extractor for the List type.
*/
protected FieldWriterFactory makeFactory(Field field) {
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
Array arrayField = ((ResultSet) context).getArray(field.getName());
if (!((ResultSet) context).wasNull()) {
List<Object> fieldValue = new ArrayList<>(Arrays.asList((Object[]) arrayField.getArray()));
BlockUtils.setComplexValue(vector, rowNum, FieldResolver.DEFAULT, fieldValue);
}
return true;
};
}
use of com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor in project aws-athena-query-federation by awslabs.
the class ElasticsearchTypeUtilsTest method testField.
/**
* Uses the correct field extractor to extract values from a document.
* @param mapping is the metadata definitions of the document being processed.
* @param document contains the values to be extracted.
* @return a map of the field names and their associated values extracted from the document.
* @throws Exception
*/
private Map<String, Object> testField(Schema mapping, Map<String, Object> document) throws Exception {
Map<String, Object> results = new HashMap<>();
for (Field field : mapping.getFields()) {
Extractor extractor = typeUtils.makeExtractor(field);
if (extractor instanceof VarCharExtractor) {
NullableVarCharHolder holder = new NullableVarCharHolder();
((VarCharExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof BigIntExtractor) {
NullableBigIntHolder holder = new NullableBigIntHolder();
((BigIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof IntExtractor) {
NullableIntHolder holder = new NullableIntHolder();
((IntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof SmallIntExtractor) {
NullableSmallIntHolder holder = new NullableSmallIntHolder();
((SmallIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof TinyIntExtractor) {
NullableTinyIntHolder holder = new NullableTinyIntHolder();
((TinyIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof Float8Extractor) {
NullableFloat8Holder holder = new NullableFloat8Holder();
((Float8Extractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof Float4Extractor) {
NullableFloat4Holder holder = new NullableFloat4Holder();
((Float4Extractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof DateMilliExtractor) {
NullableDateMilliHolder holder = new NullableDateMilliHolder();
((DateMilliExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof BitExtractor) {
NullableBitHolder holder = new NullableBitHolder();
((BitExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
}
}
return results;
}
use of com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor in project aws-athena-query-federation by awslabs.
the class GeneratedRowWriter method makeFieldWriter.
private FieldWriter makeFieldWriter(FieldVector vector) {
Field field = vector.getField();
String fieldName = field.getName();
Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
Extractor extractor = extractors.get(fieldName);
ConstraintProjector constraint = constraints.get(fieldName);
FieldWriterFactory factory = fieldWriterFactories.get(fieldName);
if (factory != null) {
return factory.create(vector, extractor, constraint);
}
if (extractor == null) {
throw new IllegalStateException("Missing extractor for field[" + fieldName + "]");
}
switch(fieldType) {
case INT:
return new IntFieldWriter((IntExtractor) extractor, (IntVector) vector, constraint);
case BIGINT:
return new BigIntFieldWriter((BigIntExtractor) extractor, (BigIntVector) vector, constraint);
case DATEMILLI:
return new DateMilliFieldWriter((DateMilliExtractor) extractor, (DateMilliVector) vector, constraint);
case DATEDAY:
return new DateDayFieldWriter((DateDayExtractor) extractor, (DateDayVector) vector, constraint);
case TINYINT:
return new TinyIntFieldWriter((TinyIntExtractor) extractor, (TinyIntVector) vector, constraint);
case SMALLINT:
return new SmallIntFieldWriter((SmallIntExtractor) extractor, (SmallIntVector) vector, constraint);
case FLOAT4:
return new Float4FieldWriter((Float4Extractor) extractor, (Float4Vector) vector, constraint);
case FLOAT8:
return new Float8FieldWriter((Float8Extractor) extractor, (Float8Vector) vector, constraint);
case DECIMAL:
return new DecimalFieldWriter((DecimalExtractor) extractor, (DecimalVector) vector, constraint);
case BIT:
return new BitFieldWriter((BitExtractor) extractor, (BitVector) vector, constraint);
case VARCHAR:
return new VarCharFieldWriter((VarCharExtractor) extractor, (VarCharVector) vector, constraint);
case VARBINARY:
return new VarBinaryFieldWriter((VarBinaryExtractor) extractor, (VarBinaryVector) vector, constraint);
default:
throw new RuntimeException(fieldType + " is not supported");
}
}
use of com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method readWithConstraint.
/**
* Here we generate our simulated row data. A real connector would instead connect to the actual source and read
* the data corresponding to the requested split.
*
* @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
* The BlockSpiller automatically handles applying constraints, chunking the response, encrypting, and spilling to S3.
* @param request The ReadRecordsRequest containing the split and other details about what to read.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker) {
long startTime = System.currentTimeMillis();
/**
* It is important to try and throw any throttling events before writing data since Athena may not be able to
* continue the query, due to consistency errors, if you throttle after writing data.
*/
if (simulateThrottle > 0 && count++ % simulateThrottle == 0) {
logger.info("readWithConstraint: throwing throttle Exception!");
throw new FederationThrottleException("Please slow down for this simulated throttling event");
}
logCaller(request);
Set<String> partitionCols = new HashSet<>();
String partitionColsMetadata = request.getSchema().getCustomMetadata().get("partitionCols");
if (partitionColsMetadata != null) {
partitionCols.addAll(Arrays.asList(partitionColsMetadata.split(",")));
}
int year = Integer.valueOf(request.getSplit().getProperty("year"));
int month = Integer.valueOf(request.getSplit().getProperty("month"));
int day = Integer.valueOf(request.getSplit().getProperty("day"));
final RowContext rowContext = new RowContext(year, month, day);
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
for (Field next : request.getSchema().getFields()) {
Extractor extractor = makeExtractor(next, rowContext);
if (extractor != null) {
builder.withExtractor(next.getName(), extractor);
} else {
builder.withFieldWriterFactory(next.getName(), makeFactory(next, rowContext));
}
}
GeneratedRowWriter rowWriter = builder.build();
for (int i = 0; i < numRowsPerSplit; i++) {
rowContext.seed = i;
rowContext.negative = i % 2 == 0;
if (!queryStatusChecker.isQueryRunning()) {
return;
}
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, rowContext) ? 1 : 0);
}
logger.info("readWithConstraint: Completed generating rows in {} ms", System.currentTimeMillis() - startTime);
}
use of com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method makeFactory.
/**
* Since GeneratedRowWriter doesn't yet support complex types (STRUCT, LIST) we use this to
* create our own FieldWriters via customer FieldWriterFactory. In this case we are producing
* FieldWriters that only work for our exact example schema. This will be enhanced with a more
* generic solution in a future release.
*/
private FieldWriterFactory makeFactory(Field field, RowContext rowContext) {
Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
switch(fieldType) {
case LIST:
Field child = field.getChildren().get(0);
Types.MinorType childType = Types.getMinorTypeForArrowType(child.getType());
switch(childType) {
case LIST:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.ListWriter innerWriter = writer.list();
innerWriter.startList();
for (int i = 0; i < 3; i++) {
byte[] bytes = String.valueOf(1000 + i).getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
innerWriter.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
innerWriter.endList();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
case STRUCT:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.StructWriter structWriter = writer.struct();
structWriter.start();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
structWriter.varChar("varchar").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
structWriter.bigInt("bigint").writeBigInt(100L);
structWriter.end();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + childType);
}
case MAP:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionMapWriter writer = ((MapVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startMap();
writer.startEntry();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.key().varChar("key").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
writer.value().integer("value").writeInt(1001);
writer.endEntry();
writer.endMap();
((MapVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + fieldType);
}
}
Aggregations