use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class CreateHCatalogUserDatasetGeneric method run.
@Override
public int run(String[] args) throws Exception {
// Construct an HCatalog dataset repository using managed Hive tables
DatasetRepository repo = DatasetRepositories.open("repo:hive");
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri("resource:user.avsc").build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i).set("creationDate", System.currentTimeMillis()).set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
writer.close();
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project beam by apache.
the class FakeJobService method writeRowsHelper.
private void writeRowsHelper(List<TableRow> rows, Schema avroSchema, String destinationPattern, int shard) throws IOException {
String filename = destinationPattern.replace("*", String.format("%012d", shard));
try (WritableByteChannel channel = FileSystems.create(FileSystems.matchNewResource(filename, false), MimeTypes.BINARY);
DataFileWriter<GenericRecord> tableRowWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)).create(avroSchema, Channels.newOutputStream(channel))) {
for (Map<String, Object> record : rows) {
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
for (Map.Entry<String, Object> field : record.entrySet()) {
genericRecordBuilder.set(field.getKey(), field.getValue());
}
tableRowWriter.append(genericRecordBuilder.build());
}
} catch (IOException e) {
throw new IllegalStateException(String.format("Could not create destination for extract job %s", filename), e);
}
}
use of org.apache.avro.generic.GenericRecordBuilder in project nifi by apache.
the class TestAvroTypeUtil method testDefaultArrayValuesInRecordsCase2.
/**
* The issue consists on having an Avro's schema with a default value in an
* array. See
* <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
* @throws IOException
* schema not found.
*/
@Test
public void testDefaultArrayValuesInRecordsCase2() throws IOException {
Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayInRecords2.json"));
GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
Record field1Record = new GenericRecordBuilder(avroSchema.getField("field1").schema()).build();
builder.set("field1", field1Record);
Record r = builder.build();
@SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) ((GenericRecord) r.get("field1")).get("listOfInt");
assertArrayEquals(new Object[] { 1, 2, 3 }, values.toArray());
RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
RecordField field = record.getField("field1").get();
assertEquals(RecordFieldType.RECORD, field.getDataType().getFieldType());
RecordDataType data = (RecordDataType) field.getDataType();
RecordSchema childSchema = data.getChildSchema();
RecordField childField = childSchema.getField("listOfInt").get();
assertEquals(RecordFieldType.ARRAY, childField.getDataType().getFieldType());
assertTrue(childField.getDefaultValue() instanceof Object[]);
assertArrayEquals(new Object[] { 1, 2, 3 }, ((Object[]) childField.getDefaultValue()));
}
use of org.apache.avro.generic.GenericRecordBuilder in project nifi by apache.
the class TestAvroTypeUtil method testDefaultArrayValue1.
/**
* The issue consists on having an Avro's schema with a default value in an
* array. See
* <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
* @throws IOException
* schema not found.
*/
@Test
public void testDefaultArrayValue1() throws IOException {
Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayValue1.json"));
GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
Record r = builder.build();
@SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) r.get("listOfInt");
assertEquals(values.size(), 0);
RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
RecordField field = record.getField("listOfInt").get();
assertEquals(RecordFieldType.ARRAY, field.getDataType().getFieldType());
assertTrue(field.getDefaultValue() instanceof Object[]);
assertEquals(0, ((Object[]) field.getDefaultValue()).length);
}
use of org.apache.avro.generic.GenericRecordBuilder in project incubator-gobblin by apache.
the class DatePartitionedAvroFileExtractorTest method setUp.
@BeforeClass
public void setUp() throws IOException {
this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
// set up datetime objects
DateTime now = new DateTime(TZ).minusHours(6);
this.startDateTime = new DateTime(now.getYear(), now.getMonthOfYear(), now.getDayOfMonth(), now.getHourOfDay(), 30, 0, TZ);
// create records, shift their timestamp by 1 minute
DateTime recordDt = startDateTime;
recordTimestamps[0] = recordDt.getMillis();
recordDt = recordDt.plusHours(4);
for (int i = 1; i < RECORD_SIZE; i++) {
recordDt = recordDt.plusMinutes(1);
recordTimestamps[i] = recordDt.getMillis();
}
// create dummy data partitioned by minutes
State state = new State();
state.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
state.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, DATE_PATTERN);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PREFIX, PREFIX);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_SUFFIX, SUFFIX);
state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, state)).writeInFormat(WriterOutputFormat.AVRO).withWriterId("writer-1").withSchema(this.schema).withBranches(1).forBranch(0);
this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, state);
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);
for (int i = 0; i < RECORD_SIZE; i++) {
genericRecordBuilder.set(PARTITION_COLUMN_NAME, recordTimestamps[i]);
this.writer.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
}
this.writer.close();
this.writer.commit();
}
Aggregations