Search in sources :

Example 1 with FieldBuilder

use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.

the class UserDefinedFunctionHandlerTest method getArrowField.

private Field getArrowField(Class type, String columnName) {
    if (type == Integer.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
    }
    if (type == Float.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null);
    }
    if (type == Double.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
    }
    if (type == String.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null);
    }
    if (type == Boolean.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null);
    }
    if (type == List.class) {
        Field childField = new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
        return new Field(columnName, FieldType.nullable(Types.MinorType.LIST.getType()), Collections.singletonList(childField));
    }
    if (type == Map.class) {
        FieldBuilder fieldBuilder = FieldBuilder.newBuilder(columnName, Types.MinorType.STRUCT.getType());
        Field childField1 = new Field("intVal", FieldType.nullable(new ArrowType.Int(32, true)), null);
        Field childField2 = new Field("doubleVal", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
        ;
        fieldBuilder.addField(childField1);
        fieldBuilder.addField(childField2);
        return fieldBuilder.build();
    }
    throw new IllegalArgumentException("Unsupported type " + type);
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder)

Example 2 with FieldBuilder

use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.

the class GlueFieldLexer method lexComplex.

private static Field lexComplex(String name, GlueTypeParser.Token startToken, GlueTypeParser parser, BaseTypeMapper mapper) {
    FieldBuilder fieldBuilder;
    logger.debug("lexComplex: enter - {}", name);
    if (startToken.getMarker() != GlueTypeParser.FIELD_START) {
        throw new RuntimeException("Parse error, expected " + GlueTypeParser.FIELD_START + " but found " + startToken.getMarker());
    }
    if (startToken.getValue().toLowerCase().equals(STRUCT)) {
        fieldBuilder = FieldBuilder.newBuilder(name, Types.MinorType.STRUCT.getType());
    } else if (startToken.getValue().toLowerCase().equals(LIST)) {
        GlueTypeParser.Token arrayType = parser.next();
        Field child;
        String type = arrayType.getValue().toLowerCase();
        if (type.equals(STRUCT) || type.equals(LIST)) {
            child = lexComplex(name, arrayType, parser, mapper);
        } else {
            child = mapper.getField(name, arrayType.getValue());
        }
        return FieldBuilder.newBuilder(name, Types.MinorType.LIST.getType()).addField(child).build();
    } else {
        throw new RuntimeException("Unexpected start type " + startToken.getValue());
    }
    while (parser.hasNext() && parser.currentToken().getMarker() != GlueTypeParser.FIELD_END) {
        Field child = lex(parser.next(), parser, mapper);
        fieldBuilder.addField(child);
        if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.LIST) {
            // An ARRAY Glue type (LIST in Arrow) within a STRUCT has the same ending token as a STRUCT (">" or
            // GlueTypeParser.FIELD_END). If allowed to proceed, the Glue parser will misinterpret the end of the
            // ARRAY to be the end of the STRUCT (which is currently being processed) ending the loop prematurely
            // and causing all subsequent fields in the STRUCT to be dropped.
            // Example: movies: STRUCT<actors:ARRAY<STRING>,genre:ARRAY<STRING>>
            // will result in Field definition: movies: Struct<actors: List<actors: Utf8>>.
            // In order to prevent that from happening, we must consume an additional token to get past the LIST's
            // ending token ">".
            parser.next();
        }
    }
    parser.next();
    logger.debug("lexComplex: exit - {}", name);
    return fieldBuilder.build();
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder)

Example 3 with FieldBuilder

use of com.amazonaws.athena.connector.lambda.data.FieldBuilder in project aws-athena-query-federation by awslabs.

the class SchemaUtils method mergeStructField.

/**
 * Used to merge STRUCT Field into a single Field. If called with two identical STRUCTs the output is essentially
 * the same as either of the inputs.
 *
 * @param fieldName The name of the merged Field.
 * @param curParentField The current field to use as the base for the merge.
 * @param newParentField The new field to merge into the base.
 * @return The merged field.
 */
private static Field mergeStructField(String fieldName, Field curParentField, Field newParentField) {
    FieldBuilder union = FieldBuilder.newBuilder(fieldName, Types.MinorType.STRUCT.getType());
    for (Field nextCur : curParentField.getChildren()) {
        union.addField(nextCur);
    }
    for (Field nextNew : newParentField.getChildren()) {
        Field curField = union.getChild(nextNew.getName());
        if (curField == null) {
            union.addField(nextNew);
            continue;
        }
        Types.MinorType newType = Types.getMinorTypeForArrowType(nextNew.getType());
        Types.MinorType curType = Types.getMinorTypeForArrowType(curField.getType());
        if (curType != newType) {
            // TODO: currently we resolve fields with mixed types by defaulting to VARCHAR. This is _not_ ideal
            // for various reasons but also because it will cause predicate odities if used in a filter.
            logger.warn("mergeStructField: Encountered a mixed-type field[{}] {} vs {}, defaulting to String.", nextNew.getName(), newType, curType);
            union.addStringField(nextNew.getName());
        } else if (curType == Types.MinorType.LIST) {
            union.addField(mergeListField(nextNew.getName(), curField, nextNew));
        } else if (curType == Types.MinorType.STRUCT) {
            union.addField(mergeStructField(nextNew.getName(), curField, nextNew));
        }
    }
    return union.build();
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Types(org.apache.arrow.vector.types.Types) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder)

Aggregations

FieldBuilder (com.amazonaws.athena.connector.lambda.data.FieldBuilder)3 Field (org.apache.arrow.vector.types.pojo.Field)3 Types (org.apache.arrow.vector.types.Types)1