Search in sources :

Example 11 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class JsonRecordAvroSchemaToAvroConverter method convertNestedRecord.

private GenericRecord convertNestedRecord(Schema outputSchema, JsonObject inputRecord, WorkUnitState workUnit) throws DataConversionException {
    GenericRecord avroRecord = new GenericData.Record(outputSchema);
    JsonElementConversionWithAvroSchemaFactory.JsonElementConverter converter;
    for (Schema.Field field : outputSchema.getFields()) {
        if (this.ignoreFields.contains(field.name())) {
            continue;
        }
        Schema.Type type = field.schema().getType();
        boolean nullable = false;
        Schema schema = field.schema();
        if (type.equals(Schema.Type.UNION)) {
            nullable = true;
            List<Schema> types = field.schema().getTypes();
            if (types.size() != 2) {
                throw new DataConversionException("Unions must be size 2, and contain one null");
            }
            if (field.schema().getTypes().get(0).getType().equals(Schema.Type.NULL)) {
                schema = field.schema().getTypes().get(1);
                type = schema.getType();
            } else if (field.schema().getTypes().get(1).getType().equals(Schema.Type.NULL)) {
                schema = field.schema().getTypes().get(0);
                type = schema.getType();
            } else {
                throw new DataConversionException("Unions must be size 2, and contain one null");
            }
            if (inputRecord.get(field.name()) == null) {
                inputRecord.add(field.name(), JsonNull.INSTANCE);
            }
        }
        if (inputRecord.get(field.name()) == null) {
            throw new DataConversionException("Field missing from record: " + field.name());
        }
        if (type.equals(Schema.Type.RECORD)) {
            if (nullable && inputRecord.get(field.name()).isJsonNull()) {
                avroRecord.put(field.name(), null);
            } else {
                avroRecord.put(field.name(), convertNestedRecord(schema, inputRecord.get(field.name()).getAsJsonObject(), workUnit));
            }
        } else {
            try {
                converter = JsonElementConversionWithAvroSchemaFactory.getConvertor(field.name(), type.getName(), schema, workUnit, nullable);
                avroRecord.put(field.name(), converter.convert(inputRecord.get(field.name())));
            } catch (Exception e) {
                throw new DataConversionException("Could not convert field " + field.name());
            }
        }
    }
    return avroRecord;
}
Also used : Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) DataConversionException(org.apache.gobblin.converter.DataConversionException) SchemaConversionException(org.apache.gobblin.converter.SchemaConversionException) DataConversionException(org.apache.gobblin.converter.DataConversionException)

Example 12 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class HiveSerDeConverter method convertRecordImpl.

@Override
public Iterable<Writable> convertRecordImpl(Object outputSchema, Writable inputRecord, WorkUnitState workUnit) throws DataConversionException {
    try {
        Object deserialized = this.deserializer.deserialize(inputRecord);
        Writable convertedRecord = this.serializer.serialize(deserialized, this.deserializer.getObjectInspector());
        return new SingleRecordIterable<>(convertedRecord);
    } catch (SerDeException e) {
        throw new DataConversionException(e);
    }
}
Also used : SingleRecordIterable(org.apache.gobblin.converter.SingleRecordIterable) Writable(org.apache.hadoop.io.Writable) DataConversionException(org.apache.gobblin.converter.DataConversionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 13 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class HiveConverterUtils method getDestinationPartitionLocation.

/**
 * Returns the partition data location of a given table and partition
 * @param table Hive table
 * @param state workunit state
 * @param partitionName partition name
 * @return partition data location
 * @throws DataConversionException
 */
public static Optional<Path> getDestinationPartitionLocation(Optional<Table> table, WorkUnitState state, String partitionName) throws DataConversionException {
    Optional<org.apache.hadoop.hive.metastore.api.Partition> partitionOptional;
    if (!table.isPresent()) {
        return Optional.absent();
    }
    try {
        HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
        try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
            partitionOptional = Optional.of(client.get().getPartition(table.get().getDbName(), table.get().getTableName(), partitionName));
        } catch (NoSuchObjectException e) {
            return Optional.absent();
        }
        if (partitionOptional.isPresent()) {
            org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
            Partition qlPartition = new Partition(qlTable, partitionOptional.get());
            return Optional.of(qlPartition.getDataLocation());
        }
    } catch (IOException | TException | HiveException e) {
        throw new DataConversionException("Could not fetch destination table metadata", e);
    }
    return Optional.absent();
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) DataConversionException(org.apache.gobblin.converter.DataConversionException) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool)

Example 14 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class AvroToJsonRecordWithMetadataConverter method convertRecord.

@Override
public Iterable<RecordWithMetadata<JsonNode>> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
    try {
        Iterable<String> innerRecordIterable = innerConverter.convertRecord(outputSchema, inputRecord, workUnit);
        String record = innerRecordIterable.iterator().next();
        JsonNode jsonRoot = objectMapper.readValue(record, JsonNode.class);
        return Collections.singleton(new RecordWithMetadata<JsonNode>(jsonRoot, defaultMetadata));
    } catch (IOException e) {
        throw new DataConversionException("Error converting to JSON", e);
    }
}
Also used : JsonNode(org.codehaus.jackson.JsonNode) IOException(java.io.IOException) DataConversionException(org.apache.gobblin.converter.DataConversionException)

Example 15 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class FlattenNestedKeyConverterTest method testConversion.

/**
 * Test schema and record conversion
 *  1. A successful schema and record conversion
 *  2. Another successful conversion by reusing the converter
 *  3. An expected failed conversion by reusing the converter
 */
public void testConversion() throws IOException {
    String key = FlattenNestedKeyConverter.class.getSimpleName() + "." + FlattenNestedKeyConverter.FIELDS_TO_FLATTEN;
    Properties props = new Properties();
    props.put(key, "name,address.street_number");
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.addAll(props);
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
    GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
    File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
    FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
    GenericRecord inputRecord = dataFileReader.next();
    FlattenNestedKeyConverter converter = new FlattenNestedKeyConverter();
    Schema outputSchema = null;
    try {
        outputSchema = converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
    Assert.assertTrue(outputSchema.getField("addressStreet_number") != null);
    GenericRecord outputRecord = null;
    try {
        outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
    } catch (DataConversionException e) {
        Assert.fail(e.getMessage());
    }
    Object expected = AvroUtils.getFieldValue(outputRecord, "address.street_number").get();
    Assert.assertTrue(outputRecord.get("addressStreet_number") == expected);
    // Reuse the converter to do another successful conversion
    props.put(key, "name,address.city");
    workUnitState.addAll(props);
    try {
        outputSchema = converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
    Assert.assertTrue(outputSchema.getField("addressCity") != null);
    try {
        outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
    } catch (DataConversionException e) {
        Assert.fail(e.getMessage());
    }
    expected = AvroUtils.getFieldValue(outputRecord, "address.city").get();
    Assert.assertTrue(outputRecord.get("addressCity") == expected);
    // Reuse the converter to do a failed conversion
    props.put(key, "name,address.anInvalidField");
    workUnitState.addAll(props);
    boolean hasAnException = false;
    try {
        converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        hasAnException = true;
    }
    Assert.assertTrue(hasAnException);
}
Also used : SchemaConversionException(org.apache.gobblin.converter.SchemaConversionException) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) Properties(java.util.Properties) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) DataConversionException(org.apache.gobblin.converter.DataConversionException) File(java.io.File)

Aggregations

DataConversionException (org.apache.gobblin.converter.DataConversionException)17 IOException (java.io.IOException)7 SingleRecordIterable (org.apache.gobblin.converter.SingleRecordIterable)7 JsonObject (com.google.gson.JsonObject)6 GenericRecord (org.apache.avro.generic.GenericRecord)5 SchemaConversionException (org.apache.gobblin.converter.SchemaConversionException)5 JsonElement (com.google.gson.JsonElement)4 Map (java.util.Map)3 Schema (org.apache.avro.Schema)3 CopyNotSupportedException (org.apache.gobblin.fork.CopyNotSupportedException)2 HiveMetastoreClientPool (org.apache.gobblin.hive.HiveMetastoreClientPool)2 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)2 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)2 Table (org.apache.hadoop.hive.metastore.api.Table)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 TException (org.apache.thrift.TException)2 RawJsonDocument (com.couchbase.client.java.document.RawJsonDocument)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 JsonArray (com.google.gson.JsonArray)1