use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class AnyToCouchbaseJsonConverter method convertRecord.
@Override
public Iterable<RawJsonDocument> convertRecord(String outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException {
JsonElement jsonElement = GSON.toJsonTree(inputRecord);
if (!jsonElement.isJsonObject()) {
throw new DataConversionException("Expecting json element " + jsonElement.toString() + " to be of type JsonObject.");
}
JsonObject jsonObject = jsonElement.getAsJsonObject();
if (!jsonObject.has(keyField)) {
throw new DataConversionException("Could not find key field " + keyField + " in json object " + jsonObject.toString());
}
JsonElement keyValueElement = jsonObject.get(keyField);
String keyString;
try {
keyString = keyValueElement.getAsString();
} catch (Exception e) {
throw new DataConversionException("Could not get the key " + keyValueElement.toString() + " as a string", e);
}
String valueString = GSON.toJson(jsonElement);
RawJsonDocument jsonDocument = RawJsonDocument.create(keyString, valueString);
return new SingleRecordIterable<>(jsonDocument);
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class Task method runSynchronousModel.
@Deprecated
private void runSynchronousModel() throws Exception {
// Get the fork operator. By default IdentityForkOperator is used with a single branch.
ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
forkOperator.init(this.taskState);
int branches = forkOperator.getBranches(this.taskState);
// Set fork.branches explicitly here so the rest task flow can pick it up
this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches);
// Extract, convert, and fork the source schema.
Object schema = converter.convertSchema(extractor.getSchema(), this.taskState);
List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema);
if (forkedSchemas.size() != branches) {
throw new ForkBranchMismatchException(String.format("Number of forked schemas [%d] is not equal to number of branches [%d]", forkedSchemas.size(), branches));
}
if (inMultipleBranches(forkedSchemas) && !(CopyHelper.isCopyable(schema))) {
throw new CopyNotSupportedException(schema + " is not copyable");
}
RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults();
if (!areSingleBranchTasksSynchronous(this.taskContext) || branches > 1) {
// Create one fork for each forked branch
for (int i = 0; i < branches; i++) {
if (forkedSchemas.get(i)) {
AsynchronousFork fork = closer.register(new AsynchronousFork(this.taskContext, schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, i, this.taskMode));
configureStreamingFork(fork, watermarkingStrategy);
// Run the Fork
this.forks.put(Optional.<Fork>of(fork), Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
} else {
this.forks.put(Optional.<Fork>absent(), Optional.<Future<?>>absent());
}
}
} else {
SynchronousFork fork = closer.register(new SynchronousFork(this.taskContext, schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, 0, this.taskMode));
configureStreamingFork(fork, watermarkingStrategy);
this.forks.put(Optional.<Fork>of(fork), Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
}
if (isStreamingTask()) {
// Start watermark manager and tracker
if (this.watermarkTracker.isPresent()) {
this.watermarkTracker.get().start();
}
this.watermarkManager.get().start();
((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());
RecordEnvelope recordEnvelope;
// Extract, convert, and fork one source record at a time.
while (!shutdownRequested() && (recordEnvelope = extractor.readRecordEnvelope()) != null) {
onRecordExtract();
AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(recordEnvelope.getWatermark());
if (watermarkTracker.isPresent()) {
watermarkTracker.get().track(ackableWatermark);
}
for (Object convertedRecord : converter.convertRecord(schema, recordEnvelope, this.taskState)) {
processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches, ackableWatermark.incrementAck());
}
ackableWatermark.ack();
}
} else {
RecordEnvelope record;
// Extract, convert, and fork one source record at a time.
long errRecords = 0;
while ((record = extractor.readRecordEnvelope()) != null) {
onRecordExtract();
try {
for (Object convertedRecord : converter.convertRecord(schema, record.getRecord(), this.taskState)) {
processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches, null);
}
} catch (Exception e) {
if (!(e instanceof DataConversionException) && !(e.getCause() instanceof DataConversionException)) {
LOG.error("Processing record incurs an unexpected exception: ", e);
throw new RuntimeException(e.getCause());
}
errRecords++;
if (errRecords > this.taskState.getPropAsLong(TaskConfigurationKeys.TASK_SKIP_ERROR_RECORDS, TaskConfigurationKeys.DEFAULT_TASK_SKIP_ERROR_RECORDS)) {
throw new RuntimeException(e);
}
}
}
}
LOG.info("Extracted " + this.recordsPulled + " data records");
LOG.info("Row quality checker finished with results: " + rowResults.getResults());
this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, this.recordsPulled);
this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount());
for (Optional<Fork> fork : this.forks.keySet()) {
if (fork.isPresent()) {
// Tell the fork that the main branch is completed and no new incoming data records should be expected
fork.get().markParentTaskDone();
}
}
for (Optional<Future<?>> forkFuture : this.forks.values()) {
if (forkFuture.isPresent()) {
try {
long forkFutureStartTime = System.nanoTime();
forkFuture.get().get();
long forkDuration = System.nanoTime() - forkFutureStartTime;
LOG.info("Task shutdown: Fork future reaped in {} millis", forkDuration / 1000000);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
}
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class TestConverter2 method convertRecord.
@Override
public Iterable<CopyableGenericRecord> convertRecord(CopyableSchema schema, String inputRecord, WorkUnitState workUnit) throws DataConversionException {
JsonElement element = GSON.fromJson(inputRecord, JsonElement.class);
Map<String, Object> fields = GSON.fromJson(element, FIELD_ENTRY_TYPE);
try {
Schema avroSchema = schema.copy();
GenericRecord record = new GenericData.Record(avroSchema);
for (Map.Entry<String, Object> entry : fields.entrySet()) {
if (entry.getValue() instanceof Double) {
// Gson reads the integers in the input Json documents as doubles, so we have
// to convert doubles to integers here as the Avro schema specifies integers.
record.put(entry.getKey(), ((Double) entry.getValue()).intValue());
} else {
record.put(entry.getKey(), entry.getValue());
}
}
return new SingleRecordIterable<CopyableGenericRecord>(new CopyableGenericRecord(record));
} catch (CopyNotSupportedException cnse) {
throw new DataConversionException(cnse);
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class AvroToBytesConverter method convertRecord.
@Override
public Iterable<byte[]> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
try {
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(bytesOut, encoderCache.get());
encoderCache.set(encoder);
writer.write(inputRecord, encoder);
encoder.flush();
return Collections.singleton(bytesOut.toByteArray());
} catch (IOException e) {
throw new DataConversionException("Error serializing record", e);
}
}
use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.
the class BytesToAvroConverter method convertRecord.
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, byte[] inputRecord, WorkUnitState workUnit) throws DataConversionException {
Preconditions.checkNotNull(recordReader, "Must have called convertSchema!");
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputRecord, decoderCache.get());
try {
GenericRecord parsedRecord = recordReader.read(null, decoder);
decoderCache.set(decoder);
return Collections.singleton(parsedRecord);
} catch (IOException e) {
throw new DataConversionException("Error parsing record", e);
}
}
Aggregations