use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestMapStructures method testDoubleMapWithStructValue.
@Test
public void testDoubleMapWithStructValue() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(DOUBLE).named("key").optionalGroup().required(INT32).named("x").required(INT32).named("y").named("value").named("key_value").named("approx").named("DoubleMapWithStructValue"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("approx", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addDouble(3.14);
rc.endField("key", 0);
rc.startField("value", 1);
rc.startGroup();
rc.startField("x", 0);
rc.addInteger(7);
rc.endField("x", 0);
rc.startField("y", 1);
rc.addInteger(22);
rc.endField("y", 1);
rc.endGroup();
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("approx", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new DoubleWritable(3.14), record(new IntWritable(7), new IntWritable(22))));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("approx"), Arrays.asList("map<bigint,struct<x:int,y:int>>"));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestMapStructures method testStringMapOptionalPrimitive.
@Test
public void testStringMapOptionalPrimitive() throws Exception {
Path test = writeDirect("StringMapOptionalPrimitive", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().required(BINARY).as(UTF8).named("key").optional(INT32).named("value").named("key_value").named("votes").named("StringMapOptionalPrimitive"), new TestArrayCompatibility.DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("votes", 0);
rc.startGroup();
rc.startField("key_value", 0);
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("lettuce"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(34);
rc.endField("value", 1);
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("kale"));
rc.endField("key", 0);
// no value for kale
rc.endGroup();
rc.startGroup();
rc.startField("key", 0);
rc.addBinary(Binary.fromString("cabbage"));
rc.endField("key", 0);
rc.startField("value", 1);
rc.addInteger(18);
rc.endField("value", 1);
rc.endGroup();
rc.endField("key_value", 0);
rc.endGroup();
rc.endField("votes", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new Text("lettuce"), new IntWritable(34)), record(new Text("kale"), null), record(new Text("cabbage"), new IntWritable(18)));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
deserialize(records.get(0), Arrays.asList("votes"), Arrays.asList("map<string,int>"));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDe.
public void testParquetHiveSerDe() throws Throwable {
try {
// Create the SerDe
System.out.println("test: testParquetHiveSerDe");
final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
final Configuration conf = new Configuration();
final Properties tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
// Data
final Writable[] arr = new Writable[9];
//primitive types
arr[0] = new ByteWritable((byte) 123);
arr[1] = new ShortWritable((short) 456);
arr[2] = new IntWritable(789);
arr[3] = new LongWritable(1000l);
arr[4] = new DoubleWritable((double) 5.3);
arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
final Writable[] map = new Writable[3];
for (int i = 0; i < 3; ++i) {
final Writable[] pair = new Writable[2];
pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
pair[1] = new IntWritable(i);
map[i] = new ArrayWritable(Writable.class, pair);
}
arr[7] = new ArrayWritable(Writable.class, map);
final Writable[] array = new Writable[5];
for (int i = 0; i < 5; ++i) {
array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
}
arr[8] = new ArrayWritable(Writable.class, array);
final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
// Test
deserializeAndSerializeLazySimple(serDe, arrWritable);
System.out.println("test: testParquetHiveSerDe - OK");
} catch (final Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestDataWritableWriter method testArrayOfArrays.
@Test
public void testArrayOfArrays() throws Exception {
String columnNames = "array_of_arrays";
String columnTypes = "array<array<int>>";
String fileSchema = "message hive_schema {\n" + " optional group array_of_arrays (LIST) {\n" + " repeated group array {\n" + " optional group array_element (LIST) {\n" + " repeated group array {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n";
ArrayWritable hiveRecord = createGroup(createArray(createArray(createInt(1), createInt(2))));
// Write record to Parquet format
writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
// Verify record was written correctly to Parquet
startMessage();
startField("array_of_arrays", 0);
startGroup();
startField("array", 0);
startGroup();
startField("array_element", 0);
startGroup();
startField("array", 0);
startGroup();
startField("array_element", 0);
addInteger(1);
endField("array_element", 0);
endGroup();
startGroup();
startField("array_element", 0);
addInteger(2);
endField("array_element", 0);
endGroup();
endField("array", 0);
endGroup();
endField("array_element", 0);
endGroup();
endField("array", 0);
endGroup();
endField("array_of_arrays", 0);
endMessage();
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestDataWritableWriter method testArrayType.
@Test
public void testArrayType() throws Exception {
String columnNames = "arrayCol";
String columnTypes = "array<int>";
String fileSchema = "message hive_schema {\n" + " optional group arrayCol (LIST) {\n" + " repeated group array {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + "}\n";
ArrayWritable hiveRecord = createGroup(createArray(createInt(1), createNull(), createInt(2)));
// Write record to Parquet format
writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
// Verify record was written correctly to Parquet
startMessage();
startField("arrayCol", 0);
startGroup();
startField("array", 0);
startGroup();
startField("array_element", 0);
addInteger(1);
endField("array_element", 0);
endGroup();
startGroup();
endGroup();
startGroup();
startField("array_element", 0);
addInteger(2);
endField("array_element", 0);
endGroup();
endField("array", 0);
endGroup();
endField("arrayCol", 0);
endMessage();
}
Aggregations