Search in sources :

Example 6 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class AnyToCouchbaseJsonConverter method convertRecord.

@Override
public Iterable<RawJsonDocument> convertRecord(String outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException {
    JsonElement jsonElement = GSON.toJsonTree(inputRecord);
    if (!jsonElement.isJsonObject()) {
        throw new DataConversionException("Expecting json element " + jsonElement.toString() + " to be of type JsonObject.");
    }
    JsonObject jsonObject = jsonElement.getAsJsonObject();
    if (!jsonObject.has(keyField)) {
        throw new DataConversionException("Could not find key field " + keyField + " in json object " + jsonObject.toString());
    }
    JsonElement keyValueElement = jsonObject.get(keyField);
    String keyString;
    try {
        keyString = keyValueElement.getAsString();
    } catch (Exception e) {
        throw new DataConversionException("Could not get the key " + keyValueElement.toString() + " as a string", e);
    }
    String valueString = GSON.toJson(jsonElement);
    RawJsonDocument jsonDocument = RawJsonDocument.create(keyString, valueString);
    return new SingleRecordIterable<>(jsonDocument);
}
Also used : SingleRecordIterable(org.apache.gobblin.converter.SingleRecordIterable) JsonElement(com.google.gson.JsonElement) JsonObject(com.google.gson.JsonObject) DataConversionException(org.apache.gobblin.converter.DataConversionException) RawJsonDocument(com.couchbase.client.java.document.RawJsonDocument) SchemaConversionException(org.apache.gobblin.converter.SchemaConversionException) DataConversionException(org.apache.gobblin.converter.DataConversionException)

Example 7 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class Task method runSynchronousModel.

@Deprecated
private void runSynchronousModel() throws Exception {
    // Get the fork operator. By default IdentityForkOperator is used with a single branch.
    ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
    forkOperator.init(this.taskState);
    int branches = forkOperator.getBranches(this.taskState);
    // Set fork.branches explicitly here so the rest task flow can pick it up
    this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches);
    // Extract, convert, and fork the source schema.
    Object schema = converter.convertSchema(extractor.getSchema(), this.taskState);
    List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema);
    if (forkedSchemas.size() != branches) {
        throw new ForkBranchMismatchException(String.format("Number of forked schemas [%d] is not equal to number of branches [%d]", forkedSchemas.size(), branches));
    }
    if (inMultipleBranches(forkedSchemas) && !(CopyHelper.isCopyable(schema))) {
        throw new CopyNotSupportedException(schema + " is not copyable");
    }
    RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults();
    if (!areSingleBranchTasksSynchronous(this.taskContext) || branches > 1) {
        // Create one fork for each forked branch
        for (int i = 0; i < branches; i++) {
            if (forkedSchemas.get(i)) {
                AsynchronousFork fork = closer.register(new AsynchronousFork(this.taskContext, schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, i, this.taskMode));
                configureStreamingFork(fork, watermarkingStrategy);
                // Run the Fork
                this.forks.put(Optional.<Fork>of(fork), Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
            } else {
                this.forks.put(Optional.<Fork>absent(), Optional.<Future<?>>absent());
            }
        }
    } else {
        SynchronousFork fork = closer.register(new SynchronousFork(this.taskContext, schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, 0, this.taskMode));
        configureStreamingFork(fork, watermarkingStrategy);
        this.forks.put(Optional.<Fork>of(fork), Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
    }
    if (isStreamingTask()) {
        // Start watermark manager and tracker
        if (this.watermarkTracker.isPresent()) {
            this.watermarkTracker.get().start();
        }
        this.watermarkManager.get().start();
        ((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());
        RecordEnvelope recordEnvelope;
        // Extract, convert, and fork one source record at a time.
        while (!shutdownRequested() && (recordEnvelope = extractor.readRecordEnvelope()) != null) {
            onRecordExtract();
            AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(recordEnvelope.getWatermark());
            if (watermarkTracker.isPresent()) {
                watermarkTracker.get().track(ackableWatermark);
            }
            for (Object convertedRecord : converter.convertRecord(schema, recordEnvelope, this.taskState)) {
                processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches, ackableWatermark.incrementAck());
            }
            ackableWatermark.ack();
        }
    } else {
        RecordEnvelope record;
        // Extract, convert, and fork one source record at a time.
        long errRecords = 0;
        while ((record = extractor.readRecordEnvelope()) != null) {
            onRecordExtract();
            try {
                for (Object convertedRecord : converter.convertRecord(schema, record.getRecord(), this.taskState)) {
                    processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches, null);
                }
            } catch (Exception e) {
                if (!(e instanceof DataConversionException) && !(e.getCause() instanceof DataConversionException)) {
                    LOG.error("Processing record incurs an unexpected exception: ", e);
                    throw new RuntimeException(e.getCause());
                }
                errRecords++;
                if (errRecords > this.taskState.getPropAsLong(TaskConfigurationKeys.TASK_SKIP_ERROR_RECORDS, TaskConfigurationKeys.DEFAULT_TASK_SKIP_ERROR_RECORDS)) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
    LOG.info("Extracted " + this.recordsPulled + " data records");
    LOG.info("Row quality checker finished with results: " + rowResults.getResults());
    this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, this.recordsPulled);
    this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount());
    for (Optional<Fork> fork : this.forks.keySet()) {
        if (fork.isPresent()) {
            // Tell the fork that the main branch is completed and no new incoming data records should be expected
            fork.get().markParentTaskDone();
        }
    }
    for (Optional<Future<?>> forkFuture : this.forks.values()) {
        if (forkFuture.isPresent()) {
            try {
                long forkFutureStartTime = System.nanoTime();
                forkFuture.get().get();
                long forkDuration = System.nanoTime() - forkFutureStartTime;
                LOG.info("Task shutdown: Fork future reaped in {} millis", forkDuration / 1000000);
            } catch (InterruptedException ie) {
                Thread.currentThread().interrupt();
            }
        }
    }
}
Also used : AsynchronousFork(org.apache.gobblin.runtime.fork.AsynchronousFork) AsynchronousFork(org.apache.gobblin.runtime.fork.AsynchronousFork) Fork(org.apache.gobblin.runtime.fork.Fork) SynchronousFork(org.apache.gobblin.runtime.fork.SynchronousFork) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) StreamingExtractor(org.apache.gobblin.source.extractor.StreamingExtractor) DataConversionException(org.apache.gobblin.converter.DataConversionException) IOException(java.io.IOException) CopyNotSupportedException(org.apache.gobblin.fork.CopyNotSupportedException) ForkOperator(org.apache.gobblin.fork.ForkOperator) Copyable(org.apache.gobblin.fork.Copyable) SynchronousFork(org.apache.gobblin.runtime.fork.SynchronousFork) Future(java.util.concurrent.Future) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DataConversionException(org.apache.gobblin.converter.DataConversionException) CopyNotSupportedException(org.apache.gobblin.fork.CopyNotSupportedException)

Example 8 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class TestConverter2 method convertRecord.

@Override
public Iterable<CopyableGenericRecord> convertRecord(CopyableSchema schema, String inputRecord, WorkUnitState workUnit) throws DataConversionException {
    JsonElement element = GSON.fromJson(inputRecord, JsonElement.class);
    Map<String, Object> fields = GSON.fromJson(element, FIELD_ENTRY_TYPE);
    try {
        Schema avroSchema = schema.copy();
        GenericRecord record = new GenericData.Record(avroSchema);
        for (Map.Entry<String, Object> entry : fields.entrySet()) {
            if (entry.getValue() instanceof Double) {
                // Gson reads the integers in the input Json documents as doubles, so we have
                // to convert doubles to integers here as the Avro schema specifies integers.
                record.put(entry.getKey(), ((Double) entry.getValue()).intValue());
            } else {
                record.put(entry.getKey(), entry.getValue());
            }
        }
        return new SingleRecordIterable<CopyableGenericRecord>(new CopyableGenericRecord(record));
    } catch (CopyNotSupportedException cnse) {
        throw new DataConversionException(cnse);
    }
}
Also used : CopyableSchema(org.apache.gobblin.fork.CopyableSchema) Schema(org.apache.avro.Schema) SingleRecordIterable(org.apache.gobblin.converter.SingleRecordIterable) CopyableGenericRecord(org.apache.gobblin.fork.CopyableGenericRecord) JsonElement(com.google.gson.JsonElement) GenericRecord(org.apache.avro.generic.GenericRecord) CopyableGenericRecord(org.apache.gobblin.fork.CopyableGenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) CopyableGenericRecord(org.apache.gobblin.fork.CopyableGenericRecord) DataConversionException(org.apache.gobblin.converter.DataConversionException) Map(java.util.Map) CopyNotSupportedException(org.apache.gobblin.fork.CopyNotSupportedException)

Example 9 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class AvroToBytesConverter method convertRecord.

@Override
public Iterable<byte[]> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException {
    try {
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(bytesOut, encoderCache.get());
        encoderCache.set(encoder);
        writer.write(inputRecord, encoder);
        encoder.flush();
        return Collections.singleton(bytesOut.toByteArray());
    } catch (IOException e) {
        throw new DataConversionException("Error serializing record", e);
    }
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) DataConversionException(org.apache.gobblin.converter.DataConversionException)

Example 10 with DataConversionException

use of org.apache.gobblin.converter.DataConversionException in project incubator-gobblin by apache.

the class BytesToAvroConverter method convertRecord.

@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, byte[] inputRecord, WorkUnitState workUnit) throws DataConversionException {
    Preconditions.checkNotNull(recordReader, "Must have called convertSchema!");
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputRecord, decoderCache.get());
    try {
        GenericRecord parsedRecord = recordReader.read(null, decoder);
        decoderCache.set(decoder);
        return Collections.singleton(parsedRecord);
    } catch (IOException e) {
        throw new DataConversionException("Error parsing record", e);
    }
}
Also used : IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) DataConversionException(org.apache.gobblin.converter.DataConversionException) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Aggregations

DataConversionException (org.apache.gobblin.converter.DataConversionException)17 IOException (java.io.IOException)7 SingleRecordIterable (org.apache.gobblin.converter.SingleRecordIterable)7 JsonObject (com.google.gson.JsonObject)6 GenericRecord (org.apache.avro.generic.GenericRecord)5 SchemaConversionException (org.apache.gobblin.converter.SchemaConversionException)5 JsonElement (com.google.gson.JsonElement)4 Map (java.util.Map)3 Schema (org.apache.avro.Schema)3 CopyNotSupportedException (org.apache.gobblin.fork.CopyNotSupportedException)2 HiveMetastoreClientPool (org.apache.gobblin.hive.HiveMetastoreClientPool)2 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)2 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)2 Table (org.apache.hadoop.hive.metastore.api.Table)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 TException (org.apache.thrift.TException)2 RawJsonDocument (com.couchbase.client.java.document.RawJsonDocument)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 JsonArray (com.google.gson.JsonArray)1