Search in sources :

Example 6 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project spf4j by zolyfarkas.

the class MavenSchemaResolver method resolveSchema.

@Override
@SuppressFBWarnings("PCAIL_POSSIBLE_CONSTANT_ALLOCATION_IN_LOOP")
public Schema resolveSchema(final String id) {
    SchemaRef ref = new SchemaRef(id);
    try {
        File artifact = MavenRepositoryUtils.resolveArtifact(ref.getGroupId(), ref.getArtifactId(), classifier, extension, ref.getVersion(), remotes, repoSystem, repoSystemSession);
        URI zipUri = URI.create("jar:" + artifact.toURI().toURL());
        FileSystem zipFs;
        synchronized (zipUri.toString().intern()) {
            // newFileSystem fails if already one there...
            try {
                zipFs = FileSystems.newFileSystem(zipUri, Collections.emptyMap());
            } catch (FileSystemAlreadyExistsException ex) {
                zipFs = FileSystems.getFileSystem(zipUri);
            } catch (ZipError ze) {
                throw new AvroRuntimeException("Cannot resolve " + id, ze);
            }
        }
        for (Path root : zipFs.getRootDirectories()) {
            Path index = root.resolve("schema_index.properties");
            if (Files.exists(index)) {
                Properties prop = new Properties();
                try (BufferedReader indexReader = Files.newBufferedReader(index)) {
                    prop.load(indexReader);
                }
                String schemaName = prop.getProperty(ref.getRef());
                if (schemaName == null) {
                    throw new AvroRuntimeException("unable to resolve schema: " + id + " missing from index " + index);
                }
                Path schemaPath = root.resolve(schemaName.replace('.', '/') + ".avsc");
                try (BufferedInputStream bis = new BufferedInputStream(Files.newInputStream(schemaPath))) {
                    return new Schema.Parser().parse(bis);
                }
            }
        }
        throw new IOException("unable to resolve schema: " + id);
    } catch (ArtifactResolutionException | IOException ex) {
        throw new AvroRuntimeException("Cannot resolve " + id, ex);
    }
}
Also used : Path(java.nio.file.Path) SchemaRef(org.spf4j.avro.SchemaRef) Schema(org.apache.avro.Schema) AvroRuntimeException(org.apache.avro.AvroRuntimeException) IOException(java.io.IOException) Properties(java.util.Properties) URI(java.net.URI) ArtifactResolutionException(org.eclipse.aether.resolution.ArtifactResolutionException) BufferedInputStream(java.io.BufferedInputStream) FileSystem(java.nio.file.FileSystem) BufferedReader(java.io.BufferedReader) FileSystemAlreadyExistsException(java.nio.file.FileSystemAlreadyExistsException) File(java.io.File) ZipError(java.util.zip.ZipError) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 7 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroKeyMapper method map.

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException {
    if (context.getNumReduceTasks() == 0) {
        context.write(key, NullWritable.get());
    } else {
        populateComparableKeyRecord(key.datum(), this.outKey.datum());
        this.outValue.datum(key.datum());
        try {
            context.write(this.outKey, this.outValue);
        } catch (AvroRuntimeException e) {
            final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
            throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
        }
    }
    context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}
Also used : AvroRuntimeException(org.apache.avro.AvroRuntimeException) IOException(java.io.IOException)

Example 8 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroGenericRecordAccessor method set.

/*
   * Recurse down record types to set the right value
   */
private void set(String fieldName, Object value) {
    try {
        String subField;
        Iterator<String> levels = Splitter.on(".").split(fieldName).iterator();
        GenericRecord toInsert = record;
        subField = levels.next();
        Object subRecord = toInsert;
        while (levels.hasNext()) {
            if (subRecord instanceof GenericRecord) {
                subRecord = ((GenericRecord) subRecord).get(subField);
            } else if (subRecord instanceof List) {
                subRecord = ((List) subRecord).get(Integer.parseInt(subField));
            } else if (subRecord instanceof Map) {
                subRecord = ((Map) subRecord).get(subField);
            }
            if (subRecord == null) {
                throw new FieldDoesNotExistException("Field " + subField + " not found when trying to set " + fieldName);
            }
            subField = levels.next();
        }
        if (!(subRecord instanceof GenericRecord)) {
            throw new IllegalArgumentException("Field " + fieldName + " does not refer to a record type.");
        }
        toInsert = (GenericRecord) subRecord;
        Object oldValue = toInsert.get(subField);
        toInsert.put(subField, value);
        Schema.Field changedField = toInsert.getSchema().getField(subField);
        GenericData genericData = GenericData.get();
        boolean valid = genericData.validate(changedField.schema(), genericData.getField(toInsert, changedField.name(), changedField.pos()));
        if (!valid) {
            toInsert.put(subField, oldValue);
            throw new IncorrectTypeException("Incorrect type - can't insert a " + value.getClass().getCanonicalName() + " into an Avro record of type " + changedField.schema().getType().toString());
        }
    } catch (AvroRuntimeException e) {
        throw new FieldDoesNotExistException("Field not found setting name " + fieldName, e);
    }
}
Also used : Schema(org.apache.avro.Schema) AvroRuntimeException(org.apache.avro.AvroRuntimeException) GenericData(org.apache.avro.generic.GenericData) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroHiveTypeUtils method generateAvroToHiveColumnMapping.

public static String generateAvroToHiveColumnMapping(Schema schema, Optional<Map<String, String>> hiveColumns, boolean topLevel, String datasetName) {
    if (topLevel && !schema.getType().equals(Schema.Type.RECORD)) {
        throw new IllegalArgumentException(String.format("Schema for table must be of type RECORD. Received type: %s for dataset %s", schema.getType(), datasetName));
    }
    StringBuilder columns = new StringBuilder();
    boolean isFirst;
    switch(schema.getType()) {
        case RECORD:
            isFirst = true;
            if (topLevel) {
                for (Schema.Field field : schema.getFields()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(", \n");
                    }
                    String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
                    if (hiveColumns.isPresent()) {
                        hiveColumns.get().put(field.name(), type);
                    }
                    String flattenSource = field.getProp("flatten_source");
                    if (StringUtils.isBlank(flattenSource)) {
                        flattenSource = field.name();
                    }
                    columns.append(String.format("  `%s` %s COMMENT 'from flatten_source %s'", field.name(), type, flattenSource));
                }
            } else {
                columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
                for (Schema.Field field : schema.getFields()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(",");
                    }
                    String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName);
                    columns.append("`").append(field.name()).append("`").append(":").append(type);
                }
                columns.append(">");
            }
            break;
        case UNION:
            Optional<Schema> optionalType = isOfOptionType(schema);
            if (optionalType.isPresent()) {
                Schema optionalTypeSchema = optionalType.get();
                columns.append(generateAvroToHiveColumnMapping(optionalTypeSchema, hiveColumns, false, datasetName));
            } else {
                columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
                isFirst = true;
                for (Schema unionMember : schema.getTypes()) {
                    if (Schema.Type.NULL.equals(unionMember.getType())) {
                        continue;
                    }
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        columns.append(",");
                    }
                    columns.append(generateAvroToHiveColumnMapping(unionMember, hiveColumns, false, datasetName));
                }
                columns.append(">");
            }
            break;
        case MAP:
            columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
            columns.append("string,").append(generateAvroToHiveColumnMapping(schema.getValueType(), hiveColumns, false, datasetName));
            columns.append(">");
            break;
        case ARRAY:
            columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
            columns.append(generateAvroToHiveColumnMapping(schema.getElementType(), hiveColumns, false, datasetName));
            columns.append(">");
            break;
        case NULL:
            break;
        case BYTES:
        case DOUBLE:
        case ENUM:
        case FIXED:
        case FLOAT:
        case INT:
        case LONG:
        case STRING:
        case BOOLEAN:
            // Handling Avro Logical Types which should always sit in leaf-level.
            boolean isLogicalTypeSet = false;
            try {
                String hiveSpecificLogicalType = generateHiveSpecificLogicalType(schema);
                if (StringUtils.isNoneEmpty(hiveSpecificLogicalType)) {
                    isLogicalTypeSet = true;
                    columns.append(hiveSpecificLogicalType);
                    break;
                }
            } catch (AvroSerdeException ae) {
                log.error("Failed to generate logical type string for field" + schema.getName() + " due to:", ae);
            }
            LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema);
            if (logicalType != null) {
                switch(logicalType.getName().toLowerCase()) {
                    case HiveAvroTypeConstants.DATE:
                        LogicalTypes.Date dateType = (LogicalTypes.Date) logicalType;
                        dateType.validate(schema);
                        columns.append("date");
                        isLogicalTypeSet = true;
                        break;
                    case HiveAvroTypeConstants.DECIMAL:
                        LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
                        decimalType.validate(schema);
                        columns.append(String.format("decimal(%s, %s)", decimalType.getPrecision(), decimalType.getScale()));
                        isLogicalTypeSet = true;
                        break;
                    case HiveAvroTypeConstants.TIME_MILLIS:
                        LogicalTypes.TimeMillis timeMillsType = (LogicalTypes.TimeMillis) logicalType;
                        timeMillsType.validate(schema);
                        columns.append("timestamp");
                        isLogicalTypeSet = true;
                        break;
                    default:
                        log.error("Unsupported logical type" + schema.getLogicalType().getName() + ", fallback to physical type");
                }
            }
            if (!isLogicalTypeSet) {
                columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
            }
            break;
        default:
            String exceptionMessage = String.format("DDL query generation failed for \"%s\" of dataset %s", schema, datasetName);
            log.error(exceptionMessage);
            throw new AvroRuntimeException(exceptionMessage);
    }
    return columns.toString();
}
Also used : Schema(org.apache.avro.Schema) LogicalType(org.apache.avro.LogicalType) LogicalTypes(org.apache.avro.LogicalTypes) AvroRuntimeException(org.apache.avro.AvroRuntimeException) AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException)

Example 10 with AvroRuntimeException

use of org.apache.avro.AvroRuntimeException in project incubator-gobblin by apache.

the class AvroUtils method getFieldHelper.

/**
 * Helper method that does the actual work for {@link #getField(Schema, String)}
 * @param schema passed from {@link #getFieldSchema(Schema, String)}
 * @param pathList passed from {@link #getFieldSchema(Schema, String)}
 * @param field keeps track of the index used to access the list pathList
 * @return the field
 */
private static Optional<Field> getFieldHelper(Schema schema, List<String> pathList, int field) {
    Field curField = schema.getField(pathList.get(field));
    if (field + 1 == pathList.size()) {
        return Optional.fromNullable(curField);
    }
    Schema fieldSchema = curField.schema();
    switch(fieldSchema.getType()) {
        case UNION:
            throw new AvroRuntimeException("Union of complex types cannot be handled : " + schema);
        case MAP:
            return AvroUtils.getFieldHelper(fieldSchema.getValueType(), pathList, ++field);
        case RECORD:
            return AvroUtils.getFieldHelper(fieldSchema, pathList, ++field);
        case ARRAY:
            return AvroUtils.getFieldHelper(fieldSchema.getElementType(), pathList, ++field);
        default:
            throw new AvroRuntimeException("Invalid type " + fieldSchema.getType() + " in schema : " + schema);
    }
}
Also used : Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) AvroRuntimeException(org.apache.avro.AvroRuntimeException)

Aggregations

AvroRuntimeException (org.apache.avro.AvroRuntimeException)17 Schema (org.apache.avro.Schema)8 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Field (org.apache.avro.Schema.Field)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ToString (lombok.ToString)2 CsvParseException (org.spf4j.io.csv.CsvParseException)2 BufferedInputStream (java.io.BufferedInputStream)1 BufferedReader (java.io.BufferedReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 File (java.io.File)1 InputStream (java.io.InputStream)1 Field (java.lang.reflect.Field)1 URI (java.net.URI)1 FileSystem (java.nio.file.FileSystem)1