Search in sources :

Example 81 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class AnyToCouchbaseJsonConverterTest method testBasicConvert.

private void testBasicConvert(String keyField, boolean setConfig) throws Exception {
    String key = "hello";
    String testContent = "hello world";
    Map<String, String> content = new HashMap<>();
    content.put(keyField, key);
    content.put("value", testContent);
    AnyToCouchbaseJsonConverter recordConverter = new AnyToCouchbaseJsonConverter();
    WorkUnitState workUnitState = mock(WorkUnitState.class);
    if (setConfig) {
        when(workUnitState.getProp(AnyToCouchbaseJsonConverter.KEY_FIELD_CONFIG)).thenReturn(keyField);
        when(workUnitState.contains(AnyToCouchbaseJsonConverter.KEY_FIELD_CONFIG)).thenReturn(true);
        recordConverter.init(workUnitState);
    } else {
        recordConverter.init(workUnitState);
    }
    RawJsonDocument returnDoc = recordConverter.convertRecord("", content, null).iterator().next();
    System.out.println(returnDoc.toString());
    Assert.assertEquals(key.getBytes(), returnDoc.id().getBytes(), "key should be equal");
    Map<String, String> convertedMap = GSON.fromJson(returnDoc.content(), Map.class);
    Assert.assertEquals(key, convertedMap.get(keyField), "key in content should be equal");
    Assert.assertEquals(testContent, convertedMap.get("value"), "value in content should be equal");
    Assert.assertEquals(2, convertedMap.keySet().size(), "should have 2 fields");
}
Also used : HashMap(java.util.HashMap) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) RawJsonDocument(com.couchbase.client.java.document.RawJsonDocument)

Example 82 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class AsyncHttpWriterBuilder method fromState.

AsyncHttpWriterBuilder<D, RQ, RP> fromState(State state) {
    if (!(state instanceof WorkUnitState)) {
        throw new IllegalStateException(String.format("AsyncHttpWriterBuilder requires a %s on construction.", WorkUnitState.class.getSimpleName()));
    }
    this.state = (WorkUnitState) state;
    this.metricContext = Instrumented.getMetricContext(this.state, AsyncHttpWriter.class);
    this.broker = this.state.getTaskBroker();
    Config config = ConfigBuilder.create().loadProps(state.getProperties(), CONF_PREFIX).build();
    config = config.withFallback(FALLBACK);
    return fromConfig(config);
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Config(com.typesafe.config.Config)

Example 83 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class AvroStringFieldEncryptorConverterTest method testNestedConversion.

@Test
public void testNestedConversion() throws DataConversionException, IOException, SchemaConversionException {
    AvroStringFieldEncryptorConverter converter = new AvroStringFieldEncryptorConverter();
    WorkUnitState wuState = new WorkUnitState();
    wuState.getJobState().setProp("converter.fieldsToEncrypt", "nestedRecords.*.fieldToEncrypt");
    wuState.getJobState().setProp("converter.encrypt.algorithm", "insecure_shift");
    converter.init(wuState);
    GenericRecord inputRecord = getRecordFromFile(getClass().getClassLoader().getResource("record_with_arrays.avro").getPath());
    Schema inputSchema = inputRecord.getSchema();
    Schema outputSchema = converter.convertSchema(inputSchema, wuState);
    List<String> origValues = new ArrayList<>();
    for (Object o : (List) inputRecord.get("nestedRecords")) {
        GenericRecord r = (GenericRecord) o;
        origValues.add(r.get("fieldToEncrypt").toString());
    }
    Iterable<GenericRecord> recordIt = converter.convertRecord(outputSchema, inputRecord, wuState);
    GenericRecord record = recordIt.iterator().next();
    Assert.assertEquals(outputSchema, inputSchema);
    List<String> decryptedValues = new ArrayList<>();
    for (Object o : (List) record.get("nestedRecords")) {
        GenericRecord r = (GenericRecord) o;
        String encryptedValue = r.get("fieldToEncrypt").toString();
        InsecureShiftCodec codec = new InsecureShiftCodec(Maps.<String, Object>newHashMap());
        InputStream in = codec.decodeInputStream(new ByteArrayInputStream(encryptedValue.getBytes(StandardCharsets.UTF_8)));
        byte[] decryptedValue = new byte[in.available()];
        in.read(decryptedValue);
        decryptedValues.add(new String(decryptedValue, StandardCharsets.UTF_8));
    }
    Assert.assertEquals(decryptedValues, origValues);
}
Also used : InsecureShiftCodec(org.apache.gobblin.test.crypto.InsecureShiftCodec) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteArrayInputStream(java.io.ByteArrayInputStream) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.testng.annotations.Test)

Example 84 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class EncryptedSerializedRecordToSerializedRecordConverterTest method setUp.

@BeforeTest
public void setUp() {
    workUnitState = new WorkUnitState();
    converter = new EncryptedSerializedRecordToSerializedRecordConverter();
    sampleRecord = new RecordWithMetadata<>(new byte[] { 'b', 'c', 'd', 'e' }, new Metadata());
    shiftedValue = new byte[] { 'a', 'b', 'c', 'd' };
    insecureShiftTag = InsecureShiftCodec.TAG;
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Metadata(org.apache.gobblin.metadata.types.Metadata) RecordWithMetadata(org.apache.gobblin.type.RecordWithMetadata) BeforeTest(org.testng.annotations.BeforeTest)

Example 85 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class KafkaAvgRecordSizeBasedWorkUnitSizeEstimator method readPreAvgRecordSizes.

private void readPreAvgRecordSizes(SourceState state) {
    this.estAvgSizes.clear();
    for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) {
        List<KafkaPartition> partitions = KafkaUtils.getPartitions(workUnitState);
        for (KafkaPartition partition : partitions) {
            if (KafkaUtils.containsPartitionAvgRecordSize(workUnitState, partition)) {
                long previousAvgSize = KafkaUtils.getPartitionAvgRecordSize(workUnitState, partition);
                this.estAvgSizes.put(partition, previousAvgSize);
            }
        }
    }
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) KafkaPartition(org.apache.gobblin.source.extractor.extract.kafka.KafkaPartition)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8