use of org.apache.avro.AvroRuntimeException in project spf4j by zolyfarkas.
the class MavenSchemaResolver method resolveSchema.
@Override
@SuppressFBWarnings("PCAIL_POSSIBLE_CONSTANT_ALLOCATION_IN_LOOP")
public Schema resolveSchema(final String id) {
SchemaRef ref = new SchemaRef(id);
try {
File artifact = MavenRepositoryUtils.resolveArtifact(ref.getGroupId(), ref.getArtifactId(), classifier, extension, ref.getVersion(), remotes, repoSystem, repoSystemSession);
URI zipUri = URI.create("jar:" + artifact.toURI().toURL());
FileSystem zipFs;
synchronized (zipUri.toString().intern()) {
// newFileSystem fails if already one there...
try {
zipFs = FileSystems.newFileSystem(zipUri, Collections.emptyMap());
} catch (FileSystemAlreadyExistsException ex) {
zipFs = FileSystems.getFileSystem(zipUri);
} catch (ZipError ze) {
throw new AvroRuntimeException("Cannot resolve " + id, ze);
}
}
for (Path root : zipFs.getRootDirectories()) {
Path index = root.resolve("schema_index.properties");
if (Files.exists(index)) {
Properties prop = new Properties();
try (BufferedReader indexReader = Files.newBufferedReader(index)) {
prop.load(indexReader);
}
String schemaName = prop.getProperty(ref.getRef());
if (schemaName == null) {
throw new AvroRuntimeException("unable to resolve schema: " + id + " missing from index " + index);
}
Path schemaPath = root.resolve(schemaName.replace('.', '/') + ".avsc");
try (BufferedInputStream bis = new BufferedInputStream(Files.newInputStream(schemaPath))) {
return new Schema.Parser().parse(bis);
}
}
}
throw new IOException("unable to resolve schema: " + id);
} catch (ArtifactResolutionException | IOException ex) {
throw new AvroRuntimeException("Cannot resolve " + id, ex);
}
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroKeyMapper method map.
@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException {
if (context.getNumReduceTasks() == 0) {
context.write(key, NullWritable.get());
} else {
populateComparableKeyRecord(key.datum(), this.outKey.datum());
this.outValue.datum(key.datum());
try {
context.write(this.outKey, this.outValue);
} catch (AvroRuntimeException e) {
final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
}
}
context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroGenericRecordAccessor method set.
/*
* Recurse down record types to set the right value
*/
private void set(String fieldName, Object value) {
try {
String subField;
Iterator<String> levels = Splitter.on(".").split(fieldName).iterator();
GenericRecord toInsert = record;
subField = levels.next();
Object subRecord = toInsert;
while (levels.hasNext()) {
if (subRecord instanceof GenericRecord) {
subRecord = ((GenericRecord) subRecord).get(subField);
} else if (subRecord instanceof List) {
subRecord = ((List) subRecord).get(Integer.parseInt(subField));
} else if (subRecord instanceof Map) {
subRecord = ((Map) subRecord).get(subField);
}
if (subRecord == null) {
throw new FieldDoesNotExistException("Field " + subField + " not found when trying to set " + fieldName);
}
subField = levels.next();
}
if (!(subRecord instanceof GenericRecord)) {
throw new IllegalArgumentException("Field " + fieldName + " does not refer to a record type.");
}
toInsert = (GenericRecord) subRecord;
Object oldValue = toInsert.get(subField);
toInsert.put(subField, value);
Schema.Field changedField = toInsert.getSchema().getField(subField);
GenericData genericData = GenericData.get();
boolean valid = genericData.validate(changedField.schema(), genericData.getField(toInsert, changedField.name(), changedField.pos()));
if (!valid) {
toInsert.put(subField, oldValue);
throw new IncorrectTypeException("Incorrect type - can't insert a " + value.getClass().getCanonicalName() + " into an Avro record of type " + changedField.schema().getType().toString());
}
} catch (AvroRuntimeException e) {
throw new FieldDoesNotExistException("Field not found setting name " + fieldName, e);
}
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroHiveTypeUtils method generateAvroToHiveColumnMapping.
public static String generateAvroToHiveColumnMapping(Schema schema, Optional<Map<String, String>> hiveColumns, boolean topLevel, String datasetName) {
if (topLevel && !schema.getType().equals(Schema.Type.RECORD)) {
throw new IllegalArgumentException(String.format("Schema for table must be of type RECORD. Received type: %s for dataset %s", schema.getType(), datasetName));
}
StringBuilder columns = new StringBuilder();
boolean isFirst;
switch(schema.getType()) {
case RECORD:
isFirst = true;
if (topLevel) {
for (Schema.Field field : schema.getFields()) {
if (isFirst) {
isFirst = false;
} else {
columns.append(", \n");
}
String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
if (hiveColumns.isPresent()) {
hiveColumns.get().put(field.name(), type);
}
String flattenSource = field.getProp("flatten_source");
if (StringUtils.isBlank(flattenSource)) {
flattenSource = field.name();
}
columns.append(String.format(" `%s` %s COMMENT 'from flatten_source %s'", field.name(), type, flattenSource));
}
} else {
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
for (Schema.Field field : schema.getFields()) {
if (isFirst) {
isFirst = false;
} else {
columns.append(",");
}
String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
columns.append("`").append(field.name()).append("`").append(":").append(type);
}
columns.append(">");
}
break;
case UNION:
Optional<Schema> optionalType = isOfOptionType(schema);
if (optionalType.isPresent()) {
Schema optionalTypeSchema = optionalType.get();
columns.append(generateAvroToHiveColumnMapping(optionalTypeSchema, hiveColumns, false, datasetName));
} else {
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
isFirst = true;
for (Schema unionMember : schema.getTypes()) {
if (Schema.Type.NULL.equals(unionMember.getType())) {
continue;
}
if (isFirst) {
isFirst = false;
} else {
columns.append(",");
}
columns.append(generateAvroToHiveColumnMapping(unionMember, hiveColumns, false, datasetName));
}
columns.append(">");
}
break;
case MAP:
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
columns.append("string,").append(generateAvroToHiveColumnMapping(schema.getValueType(), hiveColumns, false, datasetName));
columns.append(">");
break;
case ARRAY:
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
columns.append(generateAvroToHiveColumnMapping(schema.getElementType(), hiveColumns, false, datasetName));
columns.append(">");
break;
case NULL:
break;
case BYTES:
case DOUBLE:
case ENUM:
case FIXED:
case FLOAT:
case INT:
case LONG:
case STRING:
case BOOLEAN:
// Handling Avro Logical Types which should always sit in leaf-level.
boolean isLogicalTypeSet = false;
try {
String hiveSpecificLogicalType = generateHiveSpecificLogicalType(schema);
if (StringUtils.isNoneEmpty(hiveSpecificLogicalType)) {
isLogicalTypeSet = true;
columns.append(hiveSpecificLogicalType);
break;
}
} catch (AvroSerdeException ae) {
log.error("Failed to generate logical type string for field" + schema.getName() + " due to:", ae);
}
LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema);
if (logicalType != null) {
switch(logicalType.getName().toLowerCase()) {
case HiveAvroTypeConstants.DATE:
LogicalTypes.Date dateType = (LogicalTypes.Date) logicalType;
dateType.validate(schema);
columns.append("date");
isLogicalTypeSet = true;
break;
case HiveAvroTypeConstants.DECIMAL:
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
decimalType.validate(schema);
columns.append(String.format("decimal(%s, %s)", decimalType.getPrecision(), decimalType.getScale()));
isLogicalTypeSet = true;
break;
case HiveAvroTypeConstants.TIME_MILLIS:
LogicalTypes.TimeMillis timeMillsType = (LogicalTypes.TimeMillis) logicalType;
timeMillsType.validate(schema);
columns.append("timestamp");
isLogicalTypeSet = true;
break;
default:
log.error("Unsupported logical type" + schema.getLogicalType().getName() + ", fallback to physical type");
}
}
if (!isLogicalTypeSet) {
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
}
break;
default:
String exceptionMessage = String.format("DDL query generation failed for \"%s\" of dataset %s", schema, datasetName);
log.error(exceptionMessage);
throw new AvroRuntimeException(exceptionMessage);
}
return columns.toString();
}
use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.
the class AvroUtils method getFieldHelper.
/**
* Helper method that does the actual work for {@link #getField(Schema, String)}
* @param schema passed from {@link #getFieldSchema(Schema, String)}
* @param pathList passed from {@link #getFieldSchema(Schema, String)}
* @param field keeps track of the index used to access the list pathList
* @return the field
*/
private static Optional<Field> getFieldHelper(Schema schema, List<String> pathList, int field) {
Field curField = schema.getField(pathList.get(field));
if (field + 1 == pathList.size()) {
return Optional.fromNullable(curField);
}
Schema fieldSchema = curField.schema();
switch(fieldSchema.getType()) {
case UNION:
throw new AvroRuntimeException("Union of complex types cannot be handled : " + schema);
case MAP:
return AvroUtils.getFieldHelper(fieldSchema.getValueType(), pathList, ++field);
case RECORD:
return AvroUtils.getFieldHelper(fieldSchema, pathList, ++field);
case ARRAY:
return AvroUtils.getFieldHelper(fieldSchema.getElementType(), pathList, ++field);
default:
throw new AvroRuntimeException("Invalid type " + fieldSchema.getType() + " in schema : " + schema);
}
}
Aggregations