Search in sources :

Example 1 with AvroDataWriterBuilder

use of org.apache.gobblin.writer.AvroDataWriterBuilder in project incubator-gobblin by apache.

the class DatePartitionedAvroFileExtractorTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
    // set up datetime objects
    DateTime now = new DateTime(TZ).minusHours(6);
    this.startDateTime = new DateTime(now.getYear(), now.getMonthOfYear(), now.getDayOfMonth(), now.getHourOfDay(), 30, 0, TZ);
    // create records, shift their timestamp by 1 minute
    DateTime recordDt = startDateTime;
    recordTimestamps[0] = recordDt.getMillis();
    recordDt = recordDt.plusHours(4);
    for (int i = 1; i < RECORD_SIZE; i++) {
        recordDt = recordDt.plusMinutes(1);
        recordTimestamps[i] = recordDt.getMillis();
    }
    // create dummy data partitioned by minutes
    State state = new State();
    state.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
    state.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
    state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
    state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, DATE_PATTERN);
    state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PREFIX, PREFIX);
    state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_SUFFIX, SUFFIX);
    state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
    DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, state)).writeInFormat(WriterOutputFormat.AVRO).withWriterId("writer-1").withSchema(this.schema).withBranches(1).forBranch(0);
    this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, state);
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);
    for (int i = 0; i < RECORD_SIZE; i++) {
        genericRecordBuilder.set(PARTITION_COLUMN_NAME, recordTimestamps[i]);
        this.writer.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
    }
    this.writer.close();
    this.writer.commit();
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) DateTime(org.joda.time.DateTime) TimeBasedAvroWriterPartitioner(org.apache.gobblin.writer.partitioner.TimeBasedAvroWriterPartitioner) AvroDataWriterBuilder(org.apache.gobblin.writer.AvroDataWriterBuilder) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with AvroDataWriterBuilder

use of org.apache.gobblin.writer.AvroDataWriterBuilder in project incubator-gobblin by apache.

the class TimeBasedAvroWriterPartitionerTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    File stagingDir = new File(STAGING_DIR);
    File outputDir = new File(OUTPUT_DIR);
    if (!stagingDir.exists()) {
        stagingDir.mkdirs();
    } else {
        FileUtils.deleteDirectory(stagingDir);
    }
    if (!outputDir.exists()) {
        outputDir.mkdirs();
    } else {
        FileUtils.deleteDirectory(outputDir);
    }
    this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
    State properties = new State();
    properties.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
    properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
    properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
    properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
    properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
    properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, BASE_FILE_PATH);
    properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
    properties.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, "yyyy/MM/dd");
    properties.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
    // Build a writer to write test records
    DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, properties)).writeInFormat(WriterOutputFormat.AVRO).withWriterId(WRITER_ID).withSchema(this.schema).withBranches(1).forBranch(0);
    this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, properties);
}
Also used : State(org.apache.gobblin.configuration.State) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) AvroDataWriterBuilder(org.apache.gobblin.writer.AvroDataWriterBuilder) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

Schema (org.apache.avro.Schema)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 State (org.apache.gobblin.configuration.State)2 AvroDataWriterBuilder (org.apache.gobblin.writer.AvroDataWriterBuilder)2 BeforeClass (org.testng.annotations.BeforeClass)2 File (java.io.File)1 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)1 SourceState (org.apache.gobblin.configuration.SourceState)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 TimeBasedAvroWriterPartitioner (org.apache.gobblin.writer.partitioner.TimeBasedAvroWriterPartitioner)1 DateTime (org.joda.time.DateTime)1