use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project hive by apache.
the class TestDataWritableWriter method getParquetWritable.
private ParquetHiveRecord getParquetWritable(String columnNames, String columnTypes, ArrayWritable record) throws SerDeException {
Properties recordProperties = new Properties();
recordProperties.setProperty("columns", columnNames);
recordProperties.setProperty("columns.types", columnTypes);
ParquetHiveSerDe serDe = new ParquetHiveSerDe();
SerDeUtils.initializeSerDe(serDe, new Configuration(), recordProperties, null);
return new ParquetHiveRecord(serDe.deserialize(record), getObjectInspector(columnNames, columnTypes));
}
use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDe.
public void testParquetHiveSerDe() throws Throwable {
try {
// Create the SerDe
System.out.println("test: testParquetHiveSerDe");
final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
final Configuration conf = new Configuration();
final Properties tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
// Data
final Writable[] arr = new Writable[9];
// primitive types
arr[0] = new ByteWritable((byte) 123);
arr[1] = new ShortWritable((short) 456);
arr[2] = new IntWritable(789);
arr[3] = new LongWritable(1000l);
arr[4] = new DoubleWritable((double) 5.3);
arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
final Writable[] map = new Writable[3];
for (int i = 0; i < 3; ++i) {
final Writable[] pair = new Writable[2];
pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
pair[1] = new IntWritable(i);
map[i] = new ArrayWritable(Writable.class, pair);
}
arr[7] = new ArrayWritable(Writable.class, map);
final Writable[] array = new Writable[5];
for (int i = 0; i < 5; ++i) {
array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
}
arr[8] = new ArrayWritable(Writable.class, array);
final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
// Test
deserializeAndSerializeLazySimple(serDe, arrWritable);
System.out.println("test: testParquetHiveSerDe - OK");
} catch (final Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project presto by prestodb.
the class ParquetTester method writeParquetColumn.
private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, Properties tableProperties, SettableStructObjectInspector objectInspector, Iterator<?>[] valuesByField, Optional<MessageType> parquetSchema, boolean singleLevelArray) throws Exception {
RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray).getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, tableProperties, () -> {
});
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
while (stream(valuesByField).allMatch(Iterator::hasNext)) {
for (int field = 0; field < fields.size(); field++) {
Object value = valuesByField[field].next();
objectInspector.setStructFieldData(row, fields.get(field), value);
}
ParquetHiveSerDe serde = new ParquetHiveSerDe();
serde.initialize(jobConf, tableProperties, null);
Writable record = serde.serialize(row, objectInspector);
recordWriter.write(record);
}
recordWriter.close(false);
return succinctBytes(outputFile.length());
}
use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDeComplexTypes.
public void testParquetHiveSerDeComplexTypes() throws Throwable {
// Initialize
ParquetHiveSerDe serDe = new ParquetHiveSerDe();
Configuration conf = new Configuration();
Properties tblProperties = new Properties();
tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "a,s");
tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "int,struct<a:int,b:string>");
conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "s.b");
serDe.initialize(conf, tblProperties);
// Generate test data
Writable[] wb = new Writable[1];
wb[0] = new BytesWritable("foo".getBytes("UTF-8"));
Writable[] ws = new Writable[2];
ws[0] = null;
ArrayWritable awb = new ArrayWritable(Writable.class, wb);
ws[1] = awb;
ArrayWritable aws = new ArrayWritable(Writable.class, ws);
// Inspect the test data
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
StructField s = soi.getStructFieldRef("s");
assertEquals(awb, soi.getStructFieldData(aws, s));
StructObjectInspector boi = (StructObjectInspector) s.getFieldObjectInspector();
StructField b = boi.getStructFieldRef("b");
assertEquals(wb[0], boi.getStructFieldData(awb, b));
}
use of org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe in project hive by apache.
the class AbstractTestParquetDirect method deserialize.
public void deserialize(Writable record, List<String> columnNames, List<String> columnTypes) throws Exception {
ParquetHiveSerDe serde = new ParquetHiveSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, COMMA.join(columnNames));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, COMMA.join(columnTypes));
serde.initialize(null, props);
serde.deserialize(record);
}
Aggregations