Search in sources :

Example 6 with ParquetHiveSerDe

use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project presto by prestodb.

the class ParquetTester method writeParquetColumn.

private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
    RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
    });
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    int i = 0;
    while (values.hasNext()) {
        Object value = values.next();
        objectInspector.setStructFieldData(row, fields.get(0), value);
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : Path(org.apache.hadoop.fs.Path) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Writable(org.apache.hadoop.io.Writable)

Example 7 with ParquetHiveSerDe

use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project parquet-mr by apache.

the class TestParquetSerDe method testParquetHiveSerDe.

public void testParquetHiveSerDe() throws Throwable {
    try {
        // Create the SerDe
        System.out.println("test: testParquetHiveSerDe");
        final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
        final Configuration conf = new Configuration();
        final Properties tbl = createProperties();
        serDe.initialize(conf, tbl);
        // Data
        final Writable[] arr = new Writable[8];
        arr[0] = new ByteWritable((byte) 123);
        arr[1] = new ShortWritable((short) 456);
        arr[2] = new IntWritable(789);
        arr[3] = new LongWritable(1000l);
        arr[4] = new DoubleWritable((double) 5.3);
        arr[5] = new BinaryWritable(Binary.fromString("hive and hadoop and parquet. Big family."));
        final Writable[] mapContainer = new Writable[1];
        final Writable[] map = new Writable[3];
        for (int i = 0; i < 3; ++i) {
            final Writable[] pair = new Writable[2];
            pair[0] = new BinaryWritable(Binary.fromString("key_" + i));
            pair[1] = new IntWritable(i);
            map[i] = new ArrayWritable(Writable.class, pair);
        }
        mapContainer[0] = new ArrayWritable(Writable.class, map);
        arr[6] = new ArrayWritable(Writable.class, mapContainer);
        final Writable[] arrayContainer = new Writable[1];
        final Writable[] array = new Writable[5];
        for (int i = 0; i < 5; ++i) {
            array[i] = new BinaryWritable(Binary.fromString("elem_" + i));
        }
        arrayContainer[0] = new ArrayWritable(Writable.class, array);
        arr[7] = new ArrayWritable(Writable.class, arrayContainer);
        final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
        // Test
        deserializeAndSerializeLazySimple(serDe, arrWritable);
        System.out.println("test: testParquetHiveSerDe - OK");
    } catch (final Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) BinaryWritable(org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BinaryWritable(org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable)

Aggregations

ParquetHiveSerDe (org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe)7 Properties (java.util.Properties)5 Writable (org.apache.hadoop.io.Writable)5 Configuration (org.apache.hadoop.conf.Configuration)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3 ArrayWritable (org.apache.hadoop.io.ArrayWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Path (org.apache.hadoop.fs.Path)2 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2 TestMapredParquetOutputFormat (com.facebook.presto.hive.parquet.write.TestMapredParquetOutputFormat)1 AbstractIterator (com.google.common.collect.AbstractIterator)1 Iterator (java.util.Iterator)1 MapredParquetOutputFormat (org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat)1 BinaryWritable (org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable)1 ParquetHiveRecord (org.apache.hadoop.hive.serde2.io.ParquetHiveRecord)1