use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class JsonRecordAvroSchemaToAvroConverter method convertNestedRecord.
private GenericRecord convertNestedRecord(Schema outputSchema, JsonObject inputRecord, WorkUnitState workUnit) throws DataConversionException {
GenericRecord avroRecord = new GenericData.Record(outputSchema);
JsonElementConversionWithAvroSchemaFactory.JsonElementConverter converter;
for (Schema.Field field : outputSchema.getFields()) {
if (this.ignoreFields.contains(field.name())) {
continue;
}
Schema.Type type = field.schema().getType();
boolean nullable = false;
Schema schema = field.schema();
if (type.equals(Schema.Type.UNION)) {
nullable = true;
List<Schema> types = field.schema().getTypes();
if (types.size() != 2) {
throw new DataConversionException("Unions must be size 2, and contain one null");
}
if (field.schema().getTypes().get(0).getType().equals(Schema.Type.NULL)) {
schema = field.schema().getTypes().get(1);
type = schema.getType();
} else if (field.schema().getTypes().get(1).getType().equals(Schema.Type.NULL)) {
schema = field.schema().getTypes().get(0);
type = schema.getType();
} else {
throw new DataConversionException("Unions must be size 2, and contain one null");
}
if (inputRecord.get(field.name()) == null) {
inputRecord.add(field.name(), JsonNull.INSTANCE);
}
}
if (inputRecord.get(field.name()) == null) {
throw new DataConversionException("Field missing from record: " + field.name());
}
if (type.equals(Schema.Type.RECORD)) {
if (nullable && inputRecord.get(field.name()).isJsonNull()) {
avroRecord.put(field.name(), null);
} else {
avroRecord.put(field.name(), convertNestedRecord(schema, inputRecord.get(field.name()).getAsJsonObject(), workUnit));
}
} else {
try {
converter = JsonElementConversionWithAvroSchemaFactory.getConvertor(field.name(), type.getName(), schema, workUnit, nullable);
avroRecord.put(field.name(), converter.convert(inputRecord.get(field.name())));
} catch (Exception e) {
throw new DataConversionException("Could not convert field " + field.name());
}
}
}
return avroRecord;
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class HiveSerDeConverter method convertRecordImpl.
@Override
public Iterable<Writable> convertRecordImpl(Object outputSchema, Writable inputRecord, WorkUnitState workUnit) throws DataConversionException {
try {
Object deserialized = this.deserializer.deserialize(inputRecord);
Writable convertedRecord = this.serializer.serialize(deserialized, this.deserializer.getObjectInspector());
return new SingleRecordIterable<>(convertedRecord);
} catch (SerDeException e) {
throw new DataConversionException(e);
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class HiveConverterUtils method getDestinationPartitionLocation.
/**
* Returns the partition data location of a given table and partition
* @param table Hive table
* @param state workunit state
* @param partitionName partition name
* @return partition data location
* @throws DataConversionException
*/
public static Optional<Path> getDestinationPartitionLocation(Optional<Table> table, WorkUnitState state, String partitionName) throws DataConversionException {
Optional<org.apache.hadoop.hive.metastore.api.Partition> partitionOptional;
if (!table.isPresent()) {
return Optional.absent();
}
try {
HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
partitionOptional = Optional.of(client.get().getPartition(table.get().getDbName(), table.get().getTableName(), partitionName));
} catch (NoSuchObjectException e) {
return Optional.absent();
}
if (partitionOptional.isPresent()) {
org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
Partition qlPartition = new Partition(qlTable, partitionOptional.get());
return Optional.of(qlPartition.getDataLocation());
}
} catch (IOException | TException | HiveException e) {
throw new DataConversionException("Could not fetch destination table metadata", e);
}
return Optional.absent();
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class AvroToJsonRecordWithMetadataConverter method convertRecord.
@Override
public Iterable<RecordWithMetadata<JsonNode>> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
try {
Iterable<String> innerRecordIterable = innerConverter.convertRecord(outputSchema, inputRecord, workUnit);
String record = innerRecordIterable.iterator().next();
JsonNode jsonRoot = objectMapper.readValue(record, JsonNode.class);
return Collections.singleton(new RecordWithMetadata<JsonNode>(jsonRoot, defaultMetadata));
} catch (IOException e) {
throw new DataConversionException("Error converting to JSON", e);
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class FlattenNestedKeyConverterTest method testConversion.
/**
* Test schema and record conversion
* 1. A successful schema and record conversion
* 2. Another successful conversion by reusing the converter
* 3. An expected failed conversion by reusing the converter
*/
public void testConversion() throws IOException {
String key = FlattenNestedKeyConverter.class.getSimpleName() + "." + FlattenNestedKeyConverter.FIELDS_TO_FLATTEN;
Properties props = new Properties();
props.put(key, "name,address.street_number");
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.addAll(props);
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
GenericRecord inputRecord = dataFileReader.next();
FlattenNestedKeyConverter converter = new FlattenNestedKeyConverter();
Schema outputSchema = null;
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressStreet_number") != null);
GenericRecord outputRecord = null;
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
Object expected = AvroUtils.getFieldValue(outputRecord, "address.street_number").get();
Assert.assertTrue(outputRecord.get("addressStreet_number") == expected);
// Reuse the converter to do another successful conversion
props.put(key, "name,address.city");
workUnitState.addAll(props);
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressCity") != null);
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
expected = AvroUtils.getFieldValue(outputRecord, "address.city").get();
Assert.assertTrue(outputRecord.get("addressCity") == expected);
// Reuse the converter to do a failed conversion
props.put(key, "name,address.anInvalidField");
workUnitState.addAll(props);
boolean hasAnException = false;
try {
converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
hasAnException = true;
}
Assert.assertTrue(hasAnException);
}
Aggregations