Search in sources :

Example 1 with HiveWritableHdfsDataWriter

use of org.apache.gobblin.writer.HiveWritableHdfsDataWriter in project incubator-gobblin by apache.

the class HiveSerDeTest method testAvroOrcSerDes.

/**
 * This test uses Avro SerDe to deserialize data from Avro files, and use ORC SerDe
 * to serialize them into ORC files.
 */
@Test(groups = { "gobblin.serde" })
public void testAvroOrcSerDes() throws IOException, DataRecordException, DataConversionException {
    Properties properties = new Properties();
    properties.load(new FileReader("gobblin-core/src/test/resources/serde/serde.properties"));
    SourceState sourceState = new SourceState(new State(properties), ImmutableList.<WorkUnitState>of());
    OldApiWritableFileSource source = new OldApiWritableFileSource();
    List<WorkUnit> workUnits = source.getWorkunits(sourceState);
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnitState wus = new WorkUnitState(workUnits.get(0));
    wus.addAll(sourceState);
    Closer closer = Closer.create();
    HiveWritableHdfsDataWriter writer = null;
    try {
        OldApiWritableFileExtractor extractor = closer.register((OldApiWritableFileExtractor) source.getExtractor(wus));
        HiveSerDeConverter converter = closer.register(new HiveSerDeConverter());
        writer = closer.register((HiveWritableHdfsDataWriter) new HiveWritableHdfsDataWriterBuilder<>().withBranches(1).withWriterId("0").writeTo(Destination.of(DestinationType.HDFS, sourceState)).writeInFormat(WriterOutputFormat.ORC).build());
        converter.init(wus);
        Writable record;
        while ((record = extractor.readRecord(null)) != null) {
            Iterable<Writable> convertedRecordIterable = converter.convertRecordImpl(null, record, wus);
            Assert.assertEquals(Iterators.size(convertedRecordIterable.iterator()), 1);
            writer.write(convertedRecordIterable.iterator().next());
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
        if (writer != null) {
            writer.commit();
        }
        Assert.assertTrue(this.fs.exists(new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), sourceState.getProp(ConfigurationKeys.WRITER_FILE_NAME))));
        HadoopUtils.deletePath(this.fs, new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)), true);
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) OldApiWritableFileExtractor(org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileExtractor) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) HiveSerDeConverter(org.apache.gobblin.converter.serde.HiveSerDeConverter) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) HiveWritableHdfsDataWriterBuilder(org.apache.gobblin.writer.HiveWritableHdfsDataWriterBuilder) HiveWritableHdfsDataWriter(org.apache.gobblin.writer.HiveWritableHdfsDataWriter) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) FileReader(java.io.FileReader) OldApiWritableFileSource(org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileSource) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Aggregations

Closer (com.google.common.io.Closer)1 FileReader (java.io.FileReader)1 Properties (java.util.Properties)1 SourceState (org.apache.gobblin.configuration.SourceState)1 State (org.apache.gobblin.configuration.State)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 HiveSerDeConverter (org.apache.gobblin.converter.serde.HiveSerDeConverter)1 OldApiWritableFileExtractor (org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileExtractor)1 OldApiWritableFileSource (org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileSource)1 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)1 HiveWritableHdfsDataWriter (org.apache.gobblin.writer.HiveWritableHdfsDataWriter)1 HiveWritableHdfsDataWriterBuilder (org.apache.gobblin.writer.HiveWritableHdfsDataWriterBuilder)1 Path (org.apache.hadoop.fs.Path)1 Writable (org.apache.hadoop.io.Writable)1 Test (org.testng.annotations.Test)1