use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class AbstractAvroToOrcConverter method getDestinationPartitionLocation.
private Optional<Path> getDestinationPartitionLocation(Optional<Table> table, WorkUnitState state, String partitionName) throws DataConversionException {
Optional<org.apache.hadoop.hive.metastore.api.Partition> partitionOptional = Optional.<org.apache.hadoop.hive.metastore.api.Partition>absent();
if (!table.isPresent()) {
return Optional.<Path>absent();
}
try {
HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
partitionOptional = Optional.of(client.get().getPartition(table.get().getDbName(), table.get().getTableName(), partitionName));
} catch (NoSuchObjectException e) {
return Optional.<Path>absent();
}
if (partitionOptional.isPresent()) {
org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
org.apache.hadoop.hive.ql.metadata.Partition qlPartition = new org.apache.hadoop.hive.ql.metadata.Partition(qlTable, partitionOptional.get());
return Optional.of(qlPartition.getDataLocation());
}
} catch (IOException | TException | HiveException e) {
throw new DataConversionException(String.format("Could not fetch destination table %s.%s metadata", table.get().getDbName(), table.get().getTableName()), e);
}
return Optional.<Path>absent();
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class FlattenNestedKeyConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
// No fields need flatten
if (fieldNameMap.size() == 0) {
return new SingleRecordIterable<>(inputRecord);
}
GenericRecord outputRecord = new GenericData.Record(outputSchema);
for (Field field : outputSchema.getFields()) {
String fieldName = field.name();
if (fieldNameMap.containsKey(fieldName)) {
// Skip new field for now
continue;
}
outputRecord.put(fieldName, inputRecord.get(fieldName));
}
// Deal with new fields
for (Map.Entry<String, String> entry : fieldNameMap.entrySet()) {
Optional<Object> optional = AvroUtils.getFieldValue(inputRecord, entry.getValue());
if (!optional.isPresent()) {
throw new DataConversionException("Unable to get field value with location: " + entry.getValue());
}
outputRecord.put(entry.getKey(), optional.get());
}
return new SingleRecordIterable<>(outputRecord);
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class CsvToJsonConverter method convertRecord.
/**
* Takes in a record with format String and splits the data based on SOURCE_SCHEMA_DELIMITER
* Uses the inputSchema and the split record to convert the record to a JsonObject
* @return a JsonObject representing the record
* @throws DataConversionException
*/
@Override
public Iterable<JsonObject> convertRecord(JsonArray outputSchema, String inputRecord, WorkUnitState workUnit) throws DataConversionException {
try {
String strDelimiter = workUnit.getProp(ConfigurationKeys.CONVERTER_CSV_TO_JSON_DELIMITER);
if (Strings.isNullOrEmpty(strDelimiter)) {
throw new IllegalArgumentException("Delimiter cannot be empty");
}
InputStreamCSVReader reader = new InputStreamCSVReader(inputRecord, strDelimiter.charAt(0), workUnit.getProp(ConfigurationKeys.CONVERTER_CSV_TO_JSON_ENCLOSEDCHAR, ConfigurationKeys.DEFAULT_CONVERTER_CSV_TO_JSON_ENCLOSEDCHAR).charAt(0));
List<String> recordSplit;
recordSplit = Lists.newArrayList(reader.splitRecord());
JsonObject outputRecord = new JsonObject();
for (int i = 0; i < outputSchema.size(); i++) {
if (i < recordSplit.size()) {
if (recordSplit.get(i) == null) {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
} else if (recordSplit.get(i).isEmpty() || recordSplit.get(i).toLowerCase().equals(NULL)) {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
} else {
outputRecord.addProperty(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), recordSplit.get(i));
}
} else {
outputRecord.add(outputSchema.get(i).getAsJsonObject().get("columnName").getAsString(), JsonNull.INSTANCE);
}
}
return new SingleRecordIterable<>(outputRecord);
} catch (Exception e) {
throw new DataConversionException(e);
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class BytesToJsonConverter method convertRecord.
@Override
public Iterable<JsonObject> convertRecord(String outputSchema, byte[] inputRecord, WorkUnitState workUnit) throws DataConversionException {
if (inputRecord == null) {
throw new DataConversionException("Input record is null");
}
String jsonString = new String(inputRecord, Charsets.UTF_8);
JsonParser parser = new JsonParser();
JsonObject outputRecord = parser.parse(jsonString).getAsJsonObject();
return new SingleRecordIterable<>(outputRecord);
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class GrokToJsonConverter method createOutput.
@VisibleForTesting
JsonObject createOutput(JsonArray outputSchema, String inputRecord) throws DataConversionException {
JsonObject outputRecord = new JsonObject();
Match gm = grok.match(inputRecord);
gm.captures();
JsonElement capturesJson = JSON_PARSER.parse(gm.toJson());
for (JsonElement anOutputSchema : outputSchema) {
JsonObject outputSchemaJsonObject = anOutputSchema.getAsJsonObject();
String key = outputSchemaJsonObject.get(COLUMN_NAME_KEY).getAsString();
String type = outputSchemaJsonObject.getAsJsonObject(DATA_TYPE).get(TYPE_KEY).getAsString();
if (isFieldNull(capturesJson, key)) {
if (!outputSchemaJsonObject.get(NULLABLE).getAsBoolean()) {
throw new DataConversionException("Field " + key + " is null or not exists but it is non-nullable by the schema.");
}
outputRecord.add(key, JsonNull.INSTANCE);
} else {
JsonElement jsonElement = capturesJson.getAsJsonObject().get(key);
switch(type) {
case "int":
outputRecord.addProperty(key, jsonElement.getAsInt());
break;
case "long":
outputRecord.addProperty(key, jsonElement.getAsLong());
break;
case "double":
outputRecord.addProperty(key, jsonElement.getAsDouble());
break;
case "float":
outputRecord.addProperty(key, jsonElement.getAsFloat());
break;
case "boolean":
outputRecord.addProperty(key, jsonElement.getAsBoolean());
break;
case "string":
default:
outputRecord.addProperty(key, jsonElement.getAsString());
}
}
}
return outputRecord;
}
Aggregations