use of org.apache.gobblin.converter.SchemaConversionException in project incubator-gobblin by apache.
the class AvroFieldsPickConverter method convertSchema.
/**
* Convert the schema to contain only specified field. This will reuse AvroSchemaFieldRemover by listing fields not specified and remove it
* from the schema
* 1. Retrieve list of fields from property
* 2. Traverse schema and get list of fields to be removed
* 3. While traversing also confirm specified fields from property also exist
* 4. Convert schema by using AvroSchemaFieldRemover
*
* Each Avro Record type increments depth and from input depth is represented by '.'. Avro schema is always expected to start with Record type
* and first record type is depth 0 and won't be represented by '.'. As it's always expected to start with Record type, it's not necessary to disambiguate.
* After first record type, if it reaches another record type, the prefix of the field name will be
* "[Record name].".
*
* Example:
* {
* "namespace": "example.avro",
* "type": "record",
* "name": "user",
* "fields": [
* {
* "name": "name",
* "type": "string"
* },
* {
* "name": "favorite_number",
* "type": [
* "int",
* "null"
* ]
* },
* {
* "type": "record",
* "name": "address",
* "fields": [
* {
* "name": "city",
* "type": "string"
* }
* ]
* }
* ]
* }
* If user wants to only choose name and city, the input parameter should be "name,address.city". Note that it is not user.name as first record is depth zero.
* {@inheritDoc}
* @see org.apache.gobblin.converter.AvroToAvroConverterBase#convertSchema(org.apache.avro.Schema, org.apache.gobblin.configuration.WorkUnitState)
*/
@Override
public Schema convertSchema(Schema inputSchema, WorkUnitState workUnit) throws SchemaConversionException {
LOG.info("Converting schema " + inputSchema);
String fieldsStr = workUnit.getProp(ConfigurationKeys.CONVERTER_AVRO_FIELD_PICK_FIELDS);
Preconditions.checkNotNull(fieldsStr, ConfigurationKeys.CONVERTER_AVRO_FIELD_PICK_FIELDS + " is required for converter " + this.getClass().getSimpleName());
LOG.info("Converting schema to selected fields: " + fieldsStr);
try {
return createSchema(inputSchema, fieldsStr);
} catch (Exception e) {
throw new SchemaConversionException(e);
}
}
use of org.apache.gobblin.converter.SchemaConversionException in project incubator-gobblin by apache.
the class FlattenNestedKeyConverter method convertSchema.
@Override
public Schema convertSchema(Schema inputSchema, WorkUnitState workUnit) throws SchemaConversionException {
// Clear previous state
fieldNameMap.clear();
Config config = ConfigUtils.propertiesToConfig(workUnit.getProperties()).getConfig(getClass().getSimpleName());
List<String> nestedKeys = ConfigUtils.getStringList(config, FIELDS_TO_FLATTEN);
// No keys need flatten
if (nestedKeys == null || nestedKeys.size() == 0) {
return inputSchema;
}
List<Field> fields = new ArrayList<>();
// Clone the existing fields
for (Field field : inputSchema.getFields()) {
fields.add(new Field(field.name(), field.schema(), field.doc(), field.defaultValue(), field.order()));
}
// Convert each of nested keys into a top level field
for (String key : nestedKeys) {
if (!key.contains(FIELD_LOCATION_DELIMITER)) {
continue;
}
String nestedKey = key.trim();
// Create camel-cased name
String hyphenizedKey = nestedKey.replace(FIELD_LOCATION_DELIMITER, "-");
String name = CaseFormat.LOWER_HYPHEN.to(CaseFormat.LOWER_CAMEL, hyphenizedKey);
if (fieldNameMap.containsKey(name)) {
// Duplicate
continue;
}
fieldNameMap.put(name, nestedKey);
// Find the field
Optional<Field> optional = AvroUtils.getField(inputSchema, nestedKey);
if (!optional.isPresent()) {
throw new SchemaConversionException("Unable to get field with location: " + nestedKey);
}
Field field = optional.get();
// Make a copy under a new name
Field copy = new Field(name, field.schema(), field.doc(), field.defaultValue(), field.order());
fields.add(copy);
}
Schema outputSchema = Schema.createRecord(inputSchema.getName(), inputSchema.getDoc(), inputSchema.getNamespace(), inputSchema.isError());
outputSchema.setFields(fields);
return outputSchema;
}
use of org.apache.gobblin.converter.SchemaConversionException in project incubator-gobblin by apache.
the class JsonIntermediateToAvroConverter method convertSchema.
@Override
public Schema convertSchema(JsonArray schema, WorkUnitState workUnit) throws SchemaConversionException {
try {
JsonSchema jsonSchema = new JsonSchema(schema);
jsonSchema.setColumnName(workUnit.getExtract().getTable());
recordConverter = new RecordConverter(jsonSchema, workUnit, workUnit.getExtract().getNamespace());
} catch (UnsupportedDateTypeException e) {
throw new SchemaConversionException(e);
}
Schema recordSchema = recordConverter.schema();
if (workUnit.getPropAsBoolean(CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED, DEFAULT_CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED)) {
return this.generateSchemaWithNullifiedField(workUnit, recordSchema);
}
return recordSchema;
}
use of org.apache.gobblin.converter.SchemaConversionException in project incubator-gobblin by apache.
the class FlattenNestedKeyConverterTest method testConversion.
/**
* Test schema and record conversion
* 1. A successful schema and record conversion
* 2. Another successful conversion by reusing the converter
* 3. An expected failed conversion by reusing the converter
*/
public void testConversion() throws IOException {
String key = FlattenNestedKeyConverter.class.getSimpleName() + "." + FlattenNestedKeyConverter.FIELDS_TO_FLATTEN;
Properties props = new Properties();
props.put(key, "name,address.street_number");
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.addAll(props);
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
GenericRecord inputRecord = dataFileReader.next();
FlattenNestedKeyConverter converter = new FlattenNestedKeyConverter();
Schema outputSchema = null;
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressStreet_number") != null);
GenericRecord outputRecord = null;
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
Object expected = AvroUtils.getFieldValue(outputRecord, "address.street_number").get();
Assert.assertTrue(outputRecord.get("addressStreet_number") == expected);
// Reuse the converter to do another successful conversion
props.put(key, "name,address.city");
workUnitState.addAll(props);
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressCity") != null);
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
expected = AvroUtils.getFieldValue(outputRecord, "address.city").get();
Assert.assertTrue(outputRecord.get("addressCity") == expected);
// Reuse the converter to do a failed conversion
props.put(key, "name,address.anInvalidField");
workUnitState.addAll(props);
boolean hasAnException = false;
try {
converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
hasAnException = true;
}
Assert.assertTrue(hasAnException);
}
use of org.apache.gobblin.converter.SchemaConversionException in project incubator-gobblin by apache.
the class AvroToJdbcEntryConverter method produceFlattenedHelper.
private static void produceFlattenedHelper(Field field, Map<String, Type> flattened) throws SchemaConversionException {
Schema actualSchema = determineType(field.schema());
if (Type.RECORD.equals(actualSchema.getType())) {
Map<String, Type> map = flatten(actualSchema);
for (Entry<String, Type> entry : map.entrySet()) {
String key = String.format("%s" + AVRO_NESTED_COLUMN_DELIMITER + "%s", field.name(), entry.getKey());
Type existing = flattened.put(key, entry.getValue());
Preconditions.checkArgument(existing == null, "Duplicate name detected in Avro schema. Field: " + key);
}
return;
}
Type existing = flattened.put(field.name(), actualSchema.getType());
if (existing != null) {
// No duplicate name allowed when flattening (not considering name space we don't have any assumption between namespace and actual database field name)
throw new SchemaConversionException("Duplicate name detected in Avro schema. " + field.name());
}
}
Aggregations