use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class HiveAvroORCQueryGenerator method generateAvroToHiveColumnMapping.
/**
* Adapt Avro schema / types to Hive column types
* @param schema Schema to adapt and generate Hive columns with corresponding types
* @param hiveColumns Optional Map to populate with the generated hive columns for reference of caller
* @param topLevel If this is first level
* @return Generate Hive columns with types for given Avro schema
*/
private static String generateAvroToHiveColumnMapping(Schema schema, Optional<Map<String, String>> hiveColumns, boolean topLevel, String datasetName) {
if (topLevel && !schema.getType().equals(Schema.Type.RECORD)) {
throw new IllegalArgumentException(String.format("Schema for table must be of type RECORD. Received type: %s for dataset %s", schema.getType(), datasetName));
}
StringBuilder columns = new StringBuilder();
boolean isFirst;
switch(schema.getType()) {
case RECORD:
isFirst = true;
if (topLevel) {
for (Schema.Field field : schema.getFields()) {
if (isFirst) {
isFirst = false;
} else {
columns.append(", \n");
}
String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
if (hiveColumns.isPresent()) {
hiveColumns.get().put(field.name(), type);
}
String flattenSource = field.getProp("flatten_source");
if (StringUtils.isBlank(flattenSource)) {
flattenSource = field.name();
}
columns.append(String.format(" `%s` %s COMMENT 'from flatten_source %s'", field.name(), type, flattenSource));
}
} else {
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
for (Schema.Field field : schema.getFields()) {
if (isFirst) {
isFirst = false;
} else {
columns.append(",");
}
String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
columns.append("`").append(field.name()).append("`").append(":").append(type);
}
columns.append(">");
}
break;
case UNION:
Optional<Schema> optionalType = isOfOptionType(schema);
if (optionalType.isPresent()) {
Schema optionalTypeSchema = optionalType.get();
columns.append(generateAvroToHiveColumnMapping(optionalTypeSchema, hiveColumns, false, datasetName));
} else {
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
isFirst = true;
for (Schema unionMember : schema.getTypes()) {
if (Schema.Type.NULL.equals(unionMember.getType())) {
continue;
}
if (isFirst) {
isFirst = false;
} else {
columns.append(",");
}
columns.append(generateAvroToHiveColumnMapping(unionMember, hiveColumns, false, datasetName));
}
columns.append(">");
}
break;
case MAP:
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
columns.append("string,").append(generateAvroToHiveColumnMapping(schema.getValueType(), hiveColumns, false, datasetName));
columns.append(">");
break;
case ARRAY:
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
columns.append(generateAvroToHiveColumnMapping(schema.getElementType(), hiveColumns, false, datasetName));
columns.append(">");
break;
case NULL:
break;
case BYTES:
case DOUBLE:
case ENUM:
case FIXED:
case FLOAT:
case INT:
case LONG:
case STRING:
case BOOLEAN:
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
break;
default:
String exceptionMessage = String.format("DDL query generation failed for \"%s\" of dataset %s", schema, datasetName);
log.error(exceptionMessage);
throw new AvroRuntimeException(exceptionMessage);
}
return columns.toString();
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroKeyMapper method map.
@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException {
if (context.getNumReduceTasks() == 0) {
context.write(key, NullWritable.get());
} else {
populateComparableKeyRecord(key.datum(), this.outKey.datum());
this.outValue.datum(key.datum());
try {
context.write(this.outKey, this.outValue);
} catch (AvroRuntimeException e) {
final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
}
}
context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroGenericRecordAccessor method set.
/*
* Recurse down record types to set the right value
*/
private void set(String fieldName, Object value) {
try {
String subField;
Iterator<String> levels = Splitter.on(".").split(fieldName).iterator();
GenericRecord toInsert = record;
subField = levels.next();
Object subRecord = toInsert;
while (levels.hasNext()) {
if (subRecord instanceof GenericRecord) {
subRecord = ((GenericRecord) subRecord).get(subField);
} else if (subRecord instanceof List) {
subRecord = ((List) subRecord).get(Integer.parseInt(subField));
} else if (subRecord instanceof Map) {
subRecord = ((Map) subRecord).get(subField);
}
if (subRecord == null) {
throw new FieldDoesNotExistException("Field " + subField + " not found when trying to set " + fieldName);
}
subField = levels.next();
}
if (!(subRecord instanceof GenericRecord)) {
throw new IllegalArgumentException("Field " + fieldName + " does not refer to a record type.");
}
toInsert = (GenericRecord) subRecord;
Object oldValue = toInsert.get(subField);
toInsert.put(subField, value);
Schema.Field changedField = toInsert.getSchema().getField(subField);
GenericData genericData = GenericData.get();
boolean valid = genericData.validate(changedField.schema(), genericData.getField(toInsert, changedField.name(), changedField.pos()));
if (!valid) {
toInsert.put(subField, oldValue);
throw new IncorrectTypeException("Incorrect type - can't insert a " + value.getClass().getCanonicalName() + " into an Avro record of type " + changedField.schema().getType().toString());
}
} catch (AvroRuntimeException e) {
throw new FieldDoesNotExistException("Field not found setting name " + fieldName, e);
}
}
use of org.apache.avro.AvroRuntimeException in project spf4j by zolyfarkas.
the class SchemaUtils method toJson.
@SuppressWarnings(value = "unchecked")
@SuppressFBWarnings("ITC_INHERITANCE_TYPE_CHECKING")
static void toJson(final Object datum, final JsonGenerator generator) throws IOException {
if (datum == JsonProperties.NULL_VALUE) {
// null
generator.writeNull();
} else if (datum instanceof Map) {
// record, map
generator.writeStartObject();
for (Map.Entry<Object, Object> entry : ((Map<Object, Object>) datum).entrySet()) {
generator.writeFieldName(entry.getKey().toString());
toJson(entry.getValue(), generator);
}
generator.writeEndObject();
} else if (datum instanceof Collection) {
// array
generator.writeStartArray();
for (Object element : (Collection<?>) datum) {
toJson(element, generator);
}
generator.writeEndArray();
} else if (datum instanceof byte[]) {
// bytes, fixed
generator.writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1));
} else if (datum instanceof CharSequence || datum instanceof Enum<?>) {
// string, enum
generator.writeString(datum.toString());
} else if (datum instanceof Double) {
// double
generator.writeNumber((Double) datum);
} else if (datum instanceof Float) {
// float
generator.writeNumber((Float) datum);
} else if (datum instanceof Long) {
// long
generator.writeNumber((Long) datum);
} else if (datum instanceof Integer) {
// int
generator.writeNumber((Integer) datum);
} else if (datum instanceof Boolean) {
// boolean
generator.writeBoolean((Boolean) datum);
} else {
throw new AvroRuntimeException("Unknown datum class: " + datum.getClass());
}
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroUtils method switchNamespace.
/**
* Copies the input {@link org.apache.avro.Schema} but changes the schema namespace.
* @param schema {@link org.apache.avro.Schema} to copy.
* @param namespaceOverride namespace for the copied {@link org.apache.avro.Schema}.
* @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the new namespace.
*/
public static Schema switchNamespace(Schema schema, Map<String, String> namespaceOverride) {
Schema newSchema;
String newNamespace = StringUtils.EMPTY;
// (Primitives are simply cloned)
switch(schema.getType()) {
case ENUM:
newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
newSchema = Schema.createEnum(schema.getName(), schema.getDoc(), newNamespace, schema.getEnumSymbols());
break;
case FIXED:
newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
newSchema = Schema.createFixed(schema.getName(), schema.getDoc(), newNamespace, schema.getFixedSize());
break;
case MAP:
newSchema = Schema.createMap(switchNamespace(schema.getValueType(), namespaceOverride));
break;
case RECORD:
newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace();
List<Schema.Field> newFields = new ArrayList<>();
if (schema.getFields().size() > 0) {
for (Schema.Field oldField : schema.getFields()) {
Field newField = new Field(oldField.name(), switchNamespace(oldField.schema(), namespaceOverride), oldField.doc(), oldField.defaultValue(), oldField.order());
newFields.add(newField);
}
}
newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), newNamespace, schema.isError());
newSchema.setFields(newFields);
break;
case UNION:
List<Schema> newUnionMembers = new ArrayList<>();
if (null != schema.getTypes() && schema.getTypes().size() > 0) {
for (Schema oldUnionMember : schema.getTypes()) {
newUnionMembers.add(switchNamespace(oldUnionMember, namespaceOverride));
}
}
newSchema = Schema.createUnion(newUnionMembers);
break;
case ARRAY:
newSchema = Schema.createArray(switchNamespace(schema.getElementType(), namespaceOverride));
break;
case BOOLEAN:
case BYTES:
case DOUBLE:
case FLOAT:
case INT:
case LONG:
case NULL:
case STRING:
newSchema = Schema.create(schema.getType());
break;
default:
String exceptionMessage = String.format("Schema namespace replacement failed for \"%s\" ", schema);
LOG.error(exceptionMessage);
throw new AvroRuntimeException(exceptionMessage);
}
// Copy schema metadata
copyProperties(schema, newSchema);
return newSchema;
}
Aggregations