use of org.apache.gobblin.writer.AvroDataWriterBuilder in project incubator-gobblin by apache.
the class DatePartitionedAvroFileExtractorTest method setUp.
@BeforeClass
public void setUp() throws IOException {
this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
// set up datetime objects
DateTime now = new DateTime(TZ).minusHours(6);
this.startDateTime = new DateTime(now.getYear(), now.getMonthOfYear(), now.getDayOfMonth(), now.getHourOfDay(), 30, 0, TZ);
// create records, shift their timestamp by 1 minute
DateTime recordDt = startDateTime;
recordTimestamps[0] = recordDt.getMillis();
recordDt = recordDt.plusHours(4);
for (int i = 1; i < RECORD_SIZE; i++) {
recordDt = recordDt.plusMinutes(1);
recordTimestamps[i] = recordDt.getMillis();
}
// create dummy data partitioned by minutes
State state = new State();
state.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
state.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, DATE_PATTERN);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PREFIX, PREFIX);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_SUFFIX, SUFFIX);
state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, state)).writeInFormat(WriterOutputFormat.AVRO).withWriterId("writer-1").withSchema(this.schema).withBranches(1).forBranch(0);
this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, state);
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);
for (int i = 0; i < RECORD_SIZE; i++) {
genericRecordBuilder.set(PARTITION_COLUMN_NAME, recordTimestamps[i]);
this.writer.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
}
this.writer.close();
this.writer.commit();
}
use of org.apache.gobblin.writer.AvroDataWriterBuilder in project incubator-gobblin by apache.
the class TimeBasedAvroWriterPartitionerTest method setUp.
@BeforeClass
public void setUp() throws IOException {
File stagingDir = new File(STAGING_DIR);
File outputDir = new File(OUTPUT_DIR);
if (!stagingDir.exists()) {
stagingDir.mkdirs();
} else {
FileUtils.deleteDirectory(stagingDir);
}
if (!outputDir.exists()) {
outputDir.mkdirs();
} else {
FileUtils.deleteDirectory(outputDir);
}
this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
State properties = new State();
properties.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, BASE_FILE_PATH);
properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
properties.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, "yyyy/MM/dd");
properties.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
// Build a writer to write test records
DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, properties)).writeInFormat(WriterOutputFormat.AVRO).withWriterId(WRITER_ID).withSchema(this.schema).withBranches(1).forBranch(0);
this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, properties);
}
Aggregations