use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.
the class UserDefinedFunctionHandlerTest method getArrowField.
private Field getArrowField(Class type, String columnName) {
if (type == Integer.class) {
return new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
}
if (type == Float.class) {
return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null);
}
if (type == Double.class) {
return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
}
if (type == String.class) {
return new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null);
}
if (type == Boolean.class) {
return new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null);
}
if (type == List.class) {
Field childField = new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
return new Field(columnName, FieldType.nullable(Types.MinorType.LIST.getType()), Collections.singletonList(childField));
}
if (type == Map.class) {
FieldBuilder fieldBuilder = FieldBuilder.newBuilder(columnName, Types.MinorType.STRUCT.getType());
Field childField1 = new Field("intVal", FieldType.nullable(new ArrowType.Int(32, true)), null);
Field childField2 = new Field("doubleVal", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
;
fieldBuilder.addField(childField1);
fieldBuilder.addField(childField2);
return fieldBuilder.build();
}
throw new IllegalArgumentException("Unsupported type " + type);
}
use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.
the class GlueFieldLexer method lexComplex.
private static Field lexComplex(String name, GlueTypeParser.Token startToken, GlueTypeParser parser, BaseTypeMapper mapper) {
FieldBuilder fieldBuilder;
logger.debug("lexComplex: enter - {}", name);
if (startToken.getMarker() != GlueTypeParser.FIELD_START) {
throw new RuntimeException("Parse error, expected " + GlueTypeParser.FIELD_START + " but found " + startToken.getMarker());
}
if (startToken.getValue().toLowerCase().equals(STRUCT)) {
fieldBuilder = FieldBuilder.newBuilder(name, Types.MinorType.STRUCT.getType());
} else if (startToken.getValue().toLowerCase().equals(LIST)) {
GlueTypeParser.Token arrayType = parser.next();
Field child;
String type = arrayType.getValue().toLowerCase();
if (type.equals(STRUCT) || type.equals(LIST)) {
child = lexComplex(name, arrayType, parser, mapper);
} else {
child = mapper.getField(name, arrayType.getValue());
}
return FieldBuilder.newBuilder(name, Types.MinorType.LIST.getType()).addField(child).build();
} else {
throw new RuntimeException("Unexpected start type " + startToken.getValue());
}
while (parser.hasNext() && parser.currentToken().getMarker() != GlueTypeParser.FIELD_END) {
Field child = lex(parser.next(), parser, mapper);
fieldBuilder.addField(child);
if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.LIST) {
// An ARRAY Glue type (LIST in Arrow) within a STRUCT has the same ending token as a STRUCT (">" or
// GlueTypeParser.FIELD_END). If allowed to proceed, the Glue parser will misinterpret the end of the
// ARRAY to be the end of the STRUCT (which is currently being processed) ending the loop prematurely
// and causing all subsequent fields in the STRUCT to be dropped.
// Example: movies: STRUCT<actors:ARRAY<STRING>,genre:ARRAY<STRING>>
// will result in Field definition: movies: Struct<actors: List<actors: Utf8>>.
// In order to prevent that from happening, we must consume an additional token to get past the LIST's
// ending token ">".
parser.next();
}
}
parser.next();
logger.debug("lexComplex: exit - {}", name);
return fieldBuilder.build();
}
use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.
the class SchemaUtils method mergeStructField.
/**
* Used to merge STRUCT Field into a single Field. If called with two identical STRUCTs the output is essentially
* the same as either of the inputs.
*
* @param fieldName The name of the merged Field.
* @param curParentField The current field to use as the base for the merge.
* @param newParentField The new field to merge into the base.
* @return The merged field.
*/
private static Field mergeStructField(String fieldName, Field curParentField, Field newParentField) {
FieldBuilder union = FieldBuilder.newBuilder(fieldName, Types.MinorType.STRUCT.getType());
for (Field nextCur : curParentField.getChildren()) {
union.addField(nextCur);
}
for (Field nextNew : newParentField.getChildren()) {
Field curField = union.getChild(nextNew.getName());
if (curField == null) {
union.addField(nextNew);
continue;
}
Types.MinorType newType = Types.getMinorTypeForArrowType(nextNew.getType());
Types.MinorType curType = Types.getMinorTypeForArrowType(curField.getType());
if (curType != newType) {
// TODO: currently we resolve fields with mixed types by defaulting to VARCHAR. This is _not_ ideal
// for various reasons but also because it will cause predicate odities if used in a filter.
logger.warn("mergeStructField: Encountered a mixed-type field[{}] {} vs {}, defaulting to String.", nextNew.getName(), newType, curType);
union.addStringField(nextNew.getName());
} else if (curType == Types.MinorType.LIST) {
union.addField(mergeListField(nextNew.getName(), curField, nextNew));
} else if (curType == Types.MinorType.STRUCT) {
union.addField(mergeStructField(nextNew.getName(), curField, nextNew));
}
}
return union.build();
}
Aggregations