Search in sources :

Example 26 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class MockSource method writeInput.

private static void writeInput(DataSetManager<Table> tableManager, @Nullable String rowKey, Iterable<StructuredRecord> records) throws Exception {
    tableManager.flush();
    Table table = tableManager.get();
    // each rowkey will be a UUID.
    for (StructuredRecord record : records) {
        byte[] row = rowKey == null ? Bytes.toBytes(UUID.randomUUID()) : Bytes.toBytes(rowKey);
        table.put(row, SCHEMA_COL, Bytes.toBytes(record.getSchema().toString()));
        table.put(row, RECORD_COL, Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record)));
    }
    tableManager.flush();
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 27 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class MockExternalSource method writeInput.

/**
   * Used to write the input records for the pipeline run. Should be called after the pipeline has been created.
   *
   * @param fileName file to write the records into
   * @param records records that should be the input for the pipeline
   */
public static void writeInput(String fileName, Iterable<StructuredRecord> records) throws Exception {
    String output = Joiner.on("\n").join(Iterables.transform(records, new Function<StructuredRecord, String>() {

        @Override
        public String apply(StructuredRecord input) {
            return GSON.toJson(input);
        }
    }));
    Files.write(output, new File(fileName), Charsets.UTF_8);
}
Also used : Function(com.google.common.base.Function) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 28 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class ETLWorker method constructErrorPut.

private Put constructErrorPut(byte[] rowKey, InvalidEntry entry, long timeInMillis) throws IOException {
    Put errorPut = new Put(rowKey);
    errorPut.add(Constants.ErrorDataset.ERRCODE, entry.getErrorCode());
    errorPut.add(Constants.ErrorDataset.TIMESTAMP, timeInMillis);
    if (entry.getInvalidRecord() instanceof StructuredRecord) {
        StructuredRecord record = (StructuredRecord) entry.getInvalidRecord();
        errorPut.add(Constants.ErrorDataset.INVALIDENTRY, StructuredRecordStringConverter.toJsonString(record));
    } else {
        errorPut.add(Constants.ErrorDataset.INVALIDENTRY, String.format("Error Entry is of type %s, only records of type " + "co.cask.cdap.api.data.format.StructuredRecord " + "is supported currently", entry.getInvalidRecord().getClass().getName()));
    }
    return errorPut;
}
Also used : Put(co.cask.cdap.api.dataset.table.Put) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 29 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class ETLWorkerTest method testOneSourceOneSink.

@Test
@Category(SlowTests.class)
public void testOneSourceOneSink() throws Exception {
    Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = new ArrayList<>();
    input.add(StructuredRecord.builder(schema).set("id", "123").set("name", "samuel").build());
    input.add(StructuredRecord.builder(schema).set("id", "456").set("name", "jackson").build());
    File tmpDir = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink", MockSink.getPlugin(tmpDir))).addConnection("source", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("simpleApp");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    try {
        List<StructuredRecord> written = MockSink.getRecords(tmpDir, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(input, written);
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(2, appId, "source.records.out");
    validateMetric(2, appId, "sink.records.in");
}
Also used : WorkerManager(co.cask.cdap.test.WorkerManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 30 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class ETLWorkerTest method testLookup.

@Test
public void testLookup() throws Exception {
    addDatasetInstance(KeyValueTable.class.getName(), "lookupTable");
    DataSetManager<KeyValueTable> lookupTable = getDataset("lookupTable");
    lookupTable.get().write("Bob".getBytes(Charsets.UTF_8), "123".getBytes(Charsets.UTF_8));
    lookupTable.flush();
    File outDir = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", LookupSource.getPlugin(ImmutableSet.of("Bob", "Bill"), "lookupTable"))).addStage(new ETLStage("sink", MockSink.getPlugin(outDir))).addConnection("source", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("lookupTestApp");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    Schema schema = Schema.recordOf("bobbill", Schema.Field.of("Bob", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("Bill", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    List<StructuredRecord> expected = new ArrayList<>();
    expected.add(StructuredRecord.builder(schema).set("Bob", "123").build());
    try {
        List<StructuredRecord> actual = MockSink.getRecords(outDir, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(expected, actual);
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(1, appId, "source.records.out");
    validateMetric(1, appId, "sink.records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) WorkerManager(co.cask.cdap.test.WorkerManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) Test(org.junit.Test)

Aggregations

StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)97 Schema (co.cask.cdap.api.data.schema.Schema)71 Test (org.junit.Test)51 Table (co.cask.cdap.api.dataset.table.Table)36 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)36 ApplicationId (co.cask.cdap.proto.id.ApplicationId)36 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 AppRequest (co.cask.cdap.proto.artifact.AppRequest)31 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)25 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)25 WorkflowManager (co.cask.cdap.test.WorkflowManager)23 ArrayList (java.util.ArrayList)20 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)18 HashSet (java.util.HashSet)10 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)8 File (java.io.File)8 TimeoutException (java.util.concurrent.TimeoutException)8 Put (co.cask.cdap.api.dataset.table.Put)7 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)7