use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class ConnectorSink method modifyRecord.
private StructuredRecord modifyRecord(KeyValue<String, StructuredRecord> input) throws IOException {
String stageName = input.getKey();
Schema inputSchema = input.getValue().getSchema();
return StructuredRecord.builder(ConnectorSource.RECORD_WITH_SCHEMA).set("stageName", stageName).set("schema", inputSchema.toString()).set("record", StructuredRecordStringConverter.toJsonString(input.getValue())).build();
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class ETLWorkerTest method testOneSourceOneSink.
@Test
@Category(SlowTests.class)
public void testOneSourceOneSink() throws Exception {
Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = new ArrayList<>();
input.add(StructuredRecord.builder(schema).set("id", "123").set("name", "samuel").build());
input.add(StructuredRecord.builder(schema).set("id", "456").set("name", "jackson").build());
File tmpDir = TMP_FOLDER.newFolder();
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink", MockSink.getPlugin(tmpDir))).addConnection("source", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("simpleApp");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
try {
List<StructuredRecord> written = MockSink.getRecords(tmpDir, 0, 10, TimeUnit.SECONDS);
Assert.assertEquals(input, written);
} finally {
stopWorker(workerManager);
}
validateMetric(2, appId, "source.records.out");
validateMetric(2, appId, "sink.records.in");
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class ETLWorkerTest method testLookup.
@Test
public void testLookup() throws Exception {
addDatasetInstance(KeyValueTable.class.getName(), "lookupTable");
DataSetManager<KeyValueTable> lookupTable = getDataset("lookupTable");
lookupTable.get().write("Bob".getBytes(Charsets.UTF_8), "123".getBytes(Charsets.UTF_8));
lookupTable.flush();
File outDir = TMP_FOLDER.newFolder();
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", LookupSource.getPlugin(ImmutableSet.of("Bob", "Bill"), "lookupTable"))).addStage(new ETLStage("sink", MockSink.getPlugin(outDir))).addConnection("source", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("lookupTestApp");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
Schema schema = Schema.recordOf("bobbill", Schema.Field.of("Bob", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("Bill", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
List<StructuredRecord> expected = new ArrayList<>();
expected.add(StructuredRecord.builder(schema).set("Bob", "123").build());
try {
List<StructuredRecord> actual = MockSink.getRecords(outDir, 0, 10, TimeUnit.SECONDS);
Assert.assertEquals(expected, actual);
} finally {
stopWorker(workerManager);
}
validateMetric(1, appId, "source.records.out");
validateMetric(1, appId, "sink.records.in");
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class FileStreamAdmin method updateConfig.
@Override
public void updateConfig(final StreamId streamId, final StreamProperties properties) throws Exception {
Location streamLocation;
streamLocation = impersonator.doAs(streamId.getParent(), new Callable<Location>() {
@Override
public Location call() throws Exception {
return getStreamLocation(streamId);
}
});
Preconditions.checkArgument(streamLocation.isDirectory(), "Stream '%s' does not exist.", streamId);
SecurityUtil.verifyOwnerPrincipal(streamId, properties.getOwnerPrincipal(), ownerAdmin);
streamCoordinatorClient.updateProperties(streamId, new Callable<CoordinatorStreamProperties>() {
@Override
public CoordinatorStreamProperties call() throws Exception {
StreamProperties oldProperties = updateProperties(streamId, properties);
FormatSpecification format = properties.getFormat();
if (format != null) {
// if the schema has changed, we need to recreate the hive table.
// Changes in format and settings don't require
// a hive change, as they are just properties used by the stream storage handler.
Schema currSchema = oldProperties.getFormat().getSchema();
Schema newSchema = format.getSchema();
if (!Objects.equals(currSchema, newSchema)) {
alterExploreStream(streamId, false, null);
alterExploreStream(streamId, true, format);
}
}
publishAudit(streamId, AuditType.UPDATE);
return new CoordinatorStreamProperties(properties.getTTL(), properties.getFormat(), properties.getNotificationThresholdMB(), null, properties.getDescription(), properties.getOwnerPrincipal());
}
});
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class ClassicSparkProgram method main.
public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.kryo.registrator", MyKryoRegistrator.class.getName());
Schema schema = Schema.recordOf("record", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("id", Schema.of(Schema.Type.INT)));
List<StructuredRecord> records = new ArrayList<>();
for (int i = 1; i <= 10; i++) {
records.add(StructuredRecord.builder(schema).set("name", "Name" + i).set("id", i).build());
}
// This test serialization of StructuredRecord as well as using custom kryo serializer
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
int result = jsc.parallelize(records).mapToPair(new PairFunction<StructuredRecord, MyInt, StructuredRecord>() {
@Override
public Tuple2<MyInt, StructuredRecord> call(StructuredRecord record) throws Exception {
return new Tuple2<>(new MyInt((Integer) record.get("id")), record);
}
}).map(new Function<Tuple2<MyInt, StructuredRecord>, MyInt>() {
@Override
public MyInt call(Tuple2<MyInt, StructuredRecord> tuple) throws Exception {
return tuple._1;
}
}).reduce(new Function2<MyInt, MyInt, MyInt>() {
@Override
public MyInt call(MyInt v1, MyInt v2) throws Exception {
return new MyInt(v1.toInt() + v2.toInt());
}
}).toInt();
if (result != 55) {
throw new Exception("Expected result to be 55");
}
}
Aggregations