use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class ReflectionTableTest method testStructuredRecordRepresentation.
@Test
public void testStructuredRecordRepresentation() throws Exception {
dsFrameworkUtil.createInstance("table", users, DatasetProperties.builder().build());
try {
final Table usersTable = dsFrameworkUtil.getInstance(users);
final byte[] rowKey = Bytes.toBytes(123);
final Schema schema = new ReflectionSchemaGenerator().generate(User.class);
// TableDataset is not accessible here, but we know that's the underlying implementation...
TransactionExecutor tx = dsFrameworkUtil.newTransactionExecutor((TransactionAware) usersTable);
tx.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Put put = new Put(rowKey);
ReflectionPutWriter<User> putWriter = new ReflectionPutWriter<>(schema);
putWriter.write(SAMUEL, put);
usersTable.put(put);
Row row = usersTable.get(rowKey);
ReflectionRowRecordReader rowReader = new ReflectionRowRecordReader(schema, null);
StructuredRecord actual = rowReader.read(row, schema);
assertRecordEqualsUser(SAMUEL, actual);
}
});
} finally {
dsFrameworkUtil.deleteInstance(users);
}
}
use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class StreamFormatSpecSpark method run.
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
SQLContext sqlContext = new SQLContext(jsc);
// Read from CSV stream and turn it into a DataFrame
String streamName = sec.getRuntimeArguments().get("stream.name");
Schema schema = Schema.recordOf("record", ImmutableList.of(Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.INT))));
FormatSpecification formatSpec = new FormatSpecification("csv", schema);
JavaPairRDD<Long, GenericStreamEventData<StructuredRecord>> rdd = sec.fromStream(streamName, formatSpec, StructuredRecord.class);
JavaRDD<Person> personRDD = rdd.values().map(new Function<GenericStreamEventData<StructuredRecord>, Person>() {
@Override
public Person call(GenericStreamEventData<StructuredRecord> data) throws Exception {
StructuredRecord record = data.getBody();
return new Person(record.<String>get("name"), record.<Integer>get("age"));
}
});
sqlContext.createDataFrame(personRDD, Person.class).registerTempTable("people");
// Execute a SQL on the table and save the result
JavaPairRDD<String, Integer> resultRDD = sqlContext.sql(sec.getRuntimeArguments().get("sql.statement")).toJavaRDD().mapToPair(new PairFunction<Row, String, Integer>() {
@Override
public Tuple2<String, Integer> call(Row row) throws Exception {
return new Tuple2<>(row.getString(0), row.getInt(1));
}
});
sec.saveAsDataset(resultRDD, sec.getRuntimeArguments().get("output.dataset"));
}
Aggregations