use of org.apache.gobblin.converter.SingleRecordIterable in project incubator-gobblin by apache.
the class GobblinTrackingEventFlattenFilterConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
GenericRecord genericRecord = new GenericData.Record(outputSchema);
BiMap<String, String> inversedViewOfFieldsRenameMap = this.fieldsRenameMap.inverse();
for (Schema.Field field : outputSchema.getFields()) {
String curFieldName = field.name();
String originalFieldName = inversedViewOfFieldsRenameMap.containsKey(curFieldName) ? inversedViewOfFieldsRenameMap.get(curFieldName) : curFieldName;
if (this.nonMapFields.contains(originalFieldName)) {
genericRecord.put(curFieldName, inputRecord.get(originalFieldName));
} else {
genericRecord.put(curFieldName, AvroUtils.getFieldValue(inputRecord, Joiner.on('.').join(this.mapFieldName, originalFieldName)).or(""));
}
}
return new SingleRecordIterable<>(genericRecord);
}
use of org.apache.gobblin.converter.SingleRecordIterable in project incubator-gobblin by apache.
the class WikipediaConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, JsonElement inputRecord, WorkUnitState workUnit) {
JsonElement element = GSON.fromJson(inputRecord, JsonElement.class);
Map<String, Object> fields = GSON.fromJson(element, FIELD_ENTRY_TYPE);
GenericRecord record = new GenericData.Record(outputSchema);
for (Map.Entry<String, Object> entry : fields.entrySet()) {
if (entry.getKey().equals("*")) {
// switch '*' to 'content' since '*' is not a valid avro schema field name
record.put(JSON_CONTENT_MEMBER, entry.getValue());
} else {
if (outputSchema.getField(entry.getKey()) != null) {
record.put(entry.getKey(), entry.getValue());
}
}
}
return new SingleRecordIterable<>(record);
}
use of org.apache.gobblin.converter.SingleRecordIterable in project incubator-gobblin by apache.
the class SimpleJsonConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema schema, String inputRecord, WorkUnitState workUnit) throws DataConversionException {
JsonElement element = GSON.fromJson(inputRecord, JsonElement.class);
Map<String, Object> fields = GSON.fromJson(element, FIELD_ENTRY_TYPE);
GenericRecord record = new GenericData.Record(schema);
for (Map.Entry<String, Object> entry : fields.entrySet()) {
record.put(entry.getKey(), entry.getValue());
}
return new SingleRecordIterable<>(record);
}
use of org.apache.gobblin.converter.SingleRecordIterable in project incubator-gobblin by apache.
the class FlattenNestedKeyConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
// No fields need flatten
if (fieldNameMap.size() == 0) {
return new SingleRecordIterable<>(inputRecord);
}
GenericRecord outputRecord = new GenericData.Record(outputSchema);
for (Field field : outputSchema.getFields()) {
String fieldName = field.name();
if (fieldNameMap.containsKey(fieldName)) {
// Skip new field for now
continue;
}
outputRecord.put(fieldName, inputRecord.get(fieldName));
}
// Deal with new fields
for (Map.Entry<String, String> entry : fieldNameMap.entrySet()) {
Optional<Object> optional = AvroUtils.getFieldValue(inputRecord, entry.getValue());
if (!optional.isPresent()) {
throw new DataConversionException("Unable to get field value with location: " + entry.getValue());
}
outputRecord.put(entry.getKey(), optional.get());
}
return new SingleRecordIterable<>(outputRecord);
}
use of org.apache.gobblin.converter.SingleRecordIterable in project incubator-gobblin by apache.
the class CsvToJsonConverter method convertRecord.
/**
* Takes in a record with format String and splits the data based on SOURCE_SCHEMA_DELIMITER
* Uses the inputSchema and the split record to convert the record to a JsonObject
* @return a JsonObject representing the record
* @throws DataConversionException
*/
@Override
public Iterable<JsonObject> convertRecord(JsonArray outputSchema, String inputRecord, WorkUnitState workUnit) throws DataConversionException {
try {
String strDelimiter = workUnit.getProp(ConfigurationKeys.CONVERTER_CSV_TO_JSON_DELIMITER);
if (Strings.isNullOrEmpty(strDelimiter)) {
throw new IllegalArgumentException("Delimiter cannot be empty");
}
InputStreamCSVReader reader = new InputStreamCSVReader(inputRecord, strDelimiter.charAt(0), workUnit.getProp(ConfigurationKeys.CONVERTER_CSV_TO_JSON_ENCLOSEDCHAR, ConfigurationKeys.DEFAULT_CONVERTER_CSV_TO_JSON_ENCLOSEDCHAR).charAt(0));
List<String> recordSplit;
recordSplit = Lists.newArrayList(reader.splitRecord());
JsonObject outputRecord = new JsonObject();
for (int i = 0; i < outputSchema.size(); i++) {
if (i < recordSplit.size()) {
if (recordSplit.get(i) == null) {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
} else if (recordSplit.get(i).isEmpty() || recordSplit.get(i).toLowerCase().equals(NULL)) {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
} else {
outputRecord.addProperty(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), recordSplit.get(i));
}
} else {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
}
}
return new SingleRecordIterable<>(outputRecord);
} catch (Exception e) {
throw new DataConversionException(e);
}
}
Aggregations