use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class CreateUserDatasetGeneric method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri("resource:user.avsc").build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i).set("creationDate", System.currentTimeMillis()).set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
writer.close();
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class CreateUserDatasetGenericParquet method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Create a dataset of users with the Avro schema, and Parquet format in the
// repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri("resource:user.avsc").format(Formats.PARQUET).build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i).set("creationDate", System.currentTimeMillis()).set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
writer.close();
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class CreateUserDatasetGenericPartitioned method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Create a partition strategy that hash partitions on username with 10 buckets
PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash("username", 10).build();
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri("resource:user.avsc").partitionStrategy(partitionStrategy).build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i).set("creationDate", System.currentTimeMillis()).set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
writer.close();
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdap by caskdata.
the class AvroRecordFormatTest method testMultipleReads.
@Test
public void testMultipleReads() throws Exception {
Schema schema = Schema.recordOf("record", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
org.apache.avro.Schema avroSchema = convertSchema(schema);
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
GenericRecord record = new GenericRecordBuilder(avroSchema).set("x", 5).build();
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(5, actual.get("x"));
record = new GenericRecordBuilder(avroSchema).set("x", 10).build();
actual = format.read(toStreamEvent(record));
Assert.assertEquals(10, actual.get("x"));
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdap by caskdata.
the class AvroRecordFormatTest method testSchemaProjection.
@Test
public void testSchemaProjection() throws Exception {
Schema sourceSchema = Schema.recordOf("source", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Schema readSchema = Schema.recordOf("read", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
GenericRecord record = new GenericRecordBuilder(convertSchema(sourceSchema)).set("id", 1).set("name", "value").build();
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, readSchema, ImmutableMap.<String, String>of());
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
// Convert an event that has schema associated
StructuredRecord projectedRecord = format.read(toStreamEvent(record, true));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
// Convert an event that has no schema associated. The record must be written with the read schema.
record = new GenericRecordBuilder(convertSchema(readSchema)).set("name", "value2").build();
projectedRecord = format.read(toStreamEvent(record));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
}
Aggregations