Search in sources :

Example 1 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class HiveAvroORCQueryGenerator method generateAvroToHiveColumnMapping.

/**
 * Adapt Avro schema / types to Hive column types
 * @param schema Schema to adapt and generate Hive columns with corresponding types
 * @param hiveColumns Optional Map to populate with the generated hive columns for reference of caller
 * @param topLevel If this is first level
 * @return Generate Hive columns with types for given Avro schema
 */
private static String generateAvroToHiveColumnMapping(Schema schema, Optional<Map<String, String>> hiveColumns, boolean topLevel, String datasetName) {
    if (topLevel && !schema.getType().equals(Schema.Type.RECORD)) {
        throw new IllegalArgumentException(String.format("Schema for table must be of type RECORD. Received type: %s for dataset %s", schema.getType(), datasetName));
    }
    StringBuilder columns = new StringBuilder();
    boolean isFirst;
    switch(schema.getType()) {
        case RECORD:
            isFirst = true;
            if (topLevel) {
                for (Schema.Field field : schema.getFields()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(", \n");
                    }
                    String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
                    if (hiveColumns.isPresent()) {
                        hiveColumns.get().put(field.name(), type);
                    }
                    String flattenSource = field.getProp("flatten_source");
                    if (StringUtils.isBlank(flattenSource)) {
                        flattenSource = field.name();
                    }
                    columns.append(String.format("  `%s` %s COMMENT 'from flatten_source %s'", field.name(), type, flattenSource));
                }
            } else {
                columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
                for (Schema.Field field : schema.getFields()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(",");
                    }
                    String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
                    columns.append("`").append(field.name()).append("`").append(":").append(type);
                }
                columns.append(">");
            }
            break;
        case UNION:
            Optional<Schema> optionalType = isOfOptionType(schema);
            if (optionalType.isPresent()) {
                Schema optionalTypeSchema = optionalType.get();
                columns.append(generateAvroToHiveColumnMapping(optionalTypeSchema, hiveColumns, false, datasetName));
            } else {
                columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
                isFirst = true;
                for (Schema unionMember : schema.getTypes()) {
                    if (Schema.Type.NULL.equals(unionMember.getType())) {
                        continue;
                    }
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(",");
                    }
                    columns.append(generateAvroToHiveColumnMapping(unionMember, hiveColumns, false, datasetName));
                }
                columns.append(">");
            }
            break;
        case MAP:
            columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
            columns.append("string,").append(generateAvroToHiveColumnMapping(schema.getValueType(), hiveColumns, false, datasetName));
            columns.append(">");
            break;
        case ARRAY:
            columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
            columns.append(generateAvroToHiveColumnMapping(schema.getElementType(), hiveColumns, false, datasetName));
            columns.append(">");
            break;
        case NULL:
            break;
        case BYTES:
        case DOUBLE:
        case ENUM:
        case FIXED:
        case FLOAT:
        case INT:
        case LONG:
        case STRING:
        case BOOLEAN:
            columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
            break;
        default:
            String exceptionMessage = String.format("DDL query generation failed for \"%s\" of dataset %s", schema, datasetName);
            log.error(exceptionMessage);
            throw new AvroRuntimeException(exceptionMessage);
    }
    return columns.toString();
}
Also used : Schema(org.apache.avro.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) AvroRuntimeException(org.apache.avro.AvroRuntimeException) ToString(lombok.ToString)

Example 2 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroKeyMapper method map.

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException {
    if (context.getNumReduceTasks() == 0) {
        context.write(key, NullWritable.get());
    } else {
        populateComparableKeyRecord(key.datum(), this.outKey.datum());
        this.outValue.datum(key.datum());
        try {
            context.write(this.outKey, this.outValue);
        } catch (AvroRuntimeException e) {
            final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
            throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
        }
    }
    context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}
Also used : AvroRuntimeException(org.apache.avro.AvroRuntimeException) IOException(java.io.IOException)

Example 3 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroGenericRecordAccessor method set.

/*
   * Recurse down record types to set the right value
   */
private void set(String fieldName, Object value) {
    try {
        String subField;
        Iterator<String> levels = Splitter.on(".").split(fieldName).iterator();
        GenericRecord toInsert = record;
        subField = levels.next();
        Object subRecord = toInsert;
        while (levels.hasNext()) {
            if (subRecord instanceof GenericRecord) {
                subRecord = ((GenericRecord) subRecord).get(subField);
            } else if (subRecord instanceof List) {
                subRecord = ((List) subRecord).get(Integer.parseInt(subField));
            } else if (subRecord instanceof Map) {
                subRecord = ((Map) subRecord).get(subField);
            }
            if (subRecord == null) {
                throw new FieldDoesNotExistException("Field " + subField + " not found when trying to set " + fieldName);
            }
            subField = levels.next();
        }
        if (!(subRecord instanceof GenericRecord)) {
            throw new IllegalArgumentException("Field " + fieldName + " does not refer to a record type.");
        }
        toInsert = (GenericRecord) subRecord;
        Object oldValue = toInsert.get(subField);
        toInsert.put(subField, value);
        Schema.Field changedField = toInsert.getSchema().getField(subField);
        GenericData genericData = GenericData.get();
        boolean valid = genericData.validate(changedField.schema(), genericData.getField(toInsert, changedField.name(), changedField.pos()));
        if (!valid) {
            toInsert.put(subField, oldValue);
            throw new IncorrectTypeException("Incorrect type - can't insert a " + value.getClass().getCanonicalName() + " into an Avro record of type " + changedField.schema().getType().toString());
        }
    } catch (AvroRuntimeException e) {
        throw new FieldDoesNotExistException("Field not found setting name " + fieldName, e);
    }
}
Also used : Schema(org.apache.avro.Schema) AvroRuntimeException(org.apache.avro.AvroRuntimeException) GenericData(org.apache.avro.generic.GenericData) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project spf4j by zolyfarkas.

the class SchemaUtils method toJson.

@SuppressWarnings(value = "unchecked")
@SuppressFBWarnings("ITC_INHERITANCE_TYPE_CHECKING")
static void toJson(final Object datum, final JsonGenerator generator) throws IOException {
    if (datum == JsonProperties.NULL_VALUE) {
        // null
        generator.writeNull();
    } else if (datum instanceof Map) {
        // record, map
        generator.writeStartObject();
        for (Map.Entry<Object, Object> entry : ((Map<Object, Object>) datum).entrySet()) {
            generator.writeFieldName(entry.getKey().toString());
            toJson(entry.getValue(), generator);
        }
        generator.writeEndObject();
    } else if (datum instanceof Collection) {
        // array
        generator.writeStartArray();
        for (Object element : (Collection<?>) datum) {
            toJson(element, generator);
        }
        generator.writeEndArray();
    } else if (datum instanceof byte[]) {
        // bytes, fixed
        generator.writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1));
    } else if (datum instanceof CharSequence || datum instanceof Enum<?>) {
        // string, enum
        generator.writeString(datum.toString());
    } else if (datum instanceof Double) {
        // double
        generator.writeNumber((Double) datum);
    } else if (datum instanceof Float) {
        // float
        generator.writeNumber((Float) datum);
    } else if (datum instanceof Long) {
        // long
        generator.writeNumber((Long) datum);
    } else if (datum instanceof Integer) {
        // int
        generator.writeNumber((Integer) datum);
    } else if (datum instanceof Boolean) {
        // boolean
        generator.writeBoolean((Boolean) datum);
    } else {
        throw new AvroRuntimeException("Unknown datum class: " + datum.getClass());
    }
}
Also used : AvroRuntimeException(org.apache.avro.AvroRuntimeException) Collection(java.util.Collection) HashMap(java.util.HashMap) Map(java.util.Map) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 5 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroUtils method switchNamespace.

/**
 * Copies the input {@link org.apache.avro.Schema} but changes the schema namespace.
 * @param schema {@link org.apache.avro.Schema} to copy.
 * @param namespaceOverride namespace for the copied {@link org.apache.avro.Schema}.
 * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the new namespace.
 */
public static Schema switchNamespace(Schema schema, Map<String, String> namespaceOverride) {
    Schema newSchema;
    String newNamespace = StringUtils.EMPTY;
    // (Primitives are simply cloned)
    switch(schema.getType()) {
        case ENUM:
            newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
            newSchema = Schema.createEnum(schema.getName(), schema.getDoc(), newNamespace, schema.getEnumSymbols());
            break;
        case FIXED:
            newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
            newSchema = Schema.createFixed(schema.getName(), schema.getDoc(), newNamespace, schema.getFixedSize());
            break;
        case MAP:
            newSchema = Schema.createMap(switchNamespace(schema.getValueType(), namespaceOverride));
            break;
        case RECORD:
            newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
            List<Schema.Field> newFields = new ArrayList<>();
            if (schema.getFields().size() > 0) {
                for (Schema.Field oldField : schema.getFields()) {
                    Field newField = new Field(oldField.name(), switchNamespace(oldField.schema(), namespaceOverride), oldField.doc(), oldField.defaultValue(), oldField.order());
                    newFields.add(newField);
                }
            }
            newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), newNamespace, schema.isError());
            newSchema.setFields(newFields);
            break;
        case UNION:
            List<Schema> newUnionMembers = new ArrayList<>();
            if (null != schema.getTypes() && schema.getTypes().size() > 0) {
                for (Schema oldUnionMember : schema.getTypes()) {
                    newUnionMembers.add(switchNamespace(oldUnionMember, namespaceOverride));
                }
            }
            newSchema = Schema.createUnion(newUnionMembers);
            break;
        case ARRAY:
            newSchema = Schema.createArray(switchNamespace(schema.getElementType(), namespaceOverride));
            break;
        case BOOLEAN:
        case BYTES:
        case DOUBLE:
        case FLOAT:
        case INT:
        case LONG:
        case NULL:
        case STRING:
            newSchema = Schema.create(schema.getType());
            break;
        default:
            String exceptionMessage = String.format("Schema namespace replacement failed for \"%s\" ", schema);
            LOG.error(exceptionMessage);
            throw new AvroRuntimeException(exceptionMessage);
    }
    // Copy schema metadata
    copyProperties(schema, newSchema);
    return newSchema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) AvroRuntimeException(org.apache.avro.AvroRuntimeException) Field(org.apache.avro.Schema.Field)

Aggregations

AvroRuntimeException (org.apache.avro.AvroRuntimeException)14 Schema (org.apache.avro.Schema)7 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Field (org.apache.avro.Schema.Field)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 InputStream (java.io.InputStream)1 Field (java.lang.reflect.Field)1 Collection (java.util.Collection)1 List (java.util.List)1 ToString (lombok.ToString)1 SupportType (org.apache.apex.malhar.lib.util.FieldInfo.SupportType)1 GenericData (org.apache.avro.generic.GenericData)1 BinaryDecoder (org.apache.avro.io.BinaryDecoder)1 Decoder (org.apache.avro.io.Decoder)1