use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestArrayCompatibility method testMultiFieldGroupInList.
@Test
public void testMultiFieldGroupInList() throws Exception {
// tests the missing element layer, detected by a multi-field group
Path test = writeDirect("MultiFieldGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named(// should not affect schema conversion
"element").named("locations").named("MultiFieldGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("locations", 0);
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(0.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(180.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
rc.endGroup();
rc.endField("locations", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(0.0)), record(new DoubleWritable(0.0), new DoubleWritable(180.0)));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestArrayCompatibility method testHiveRequiredGroupInList.
@Test
public void testHiveRequiredGroupInList() throws Exception {
// this matches the list structure that Hive writes
Path test = writeDirect("HiveRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("bag").named("locations").named("HiveRequiredGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("locations", 0);
rc.startGroup();
// start writing array contents
rc.startField("bag", 0);
// write a non-null element
// array level
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(180.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
// array level
rc.endGroup();
// write a second non-null element
// array level
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(0.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
// array level
rc.endGroup();
// finished writing array contents
rc.endField("bag", 0);
rc.endGroup();
rc.endField("locations", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestArrayCompatibility method testNewRequiredGroupInList.
@Test
public void testNewRequiredGroupInList() throws Exception {
Path test = writeDirect("NewRequiredGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().requiredGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("list").named("locations").named("NewRequiredGroupInList"), new DirectWriter() {
@Override
public void write(RecordConsumer rc) {
rc.startMessage();
rc.startField("locations", 0);
rc.startGroup();
// start writing array contents
rc.startField("list", 0);
// write a non-null element
// array level
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(180.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
// array level
rc.endGroup();
// write a second non-null element
// array level
rc.startGroup();
rc.startField("element", 0);
rc.startGroup();
rc.startField("latitude", 0);
rc.addDouble(0.0);
rc.endField("latitude", 0);
rc.startField("longitude", 1);
rc.addDouble(0.0);
rc.endField("longitude", 1);
rc.endGroup();
rc.endField("element", 0);
// array level
rc.endGroup();
// finished writing array contents
rc.endField("list", 0);
rc.endGroup();
rc.endField("locations", 0);
rc.endMessage();
}
});
ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(180.0)), record(new DoubleWritable(0.0), new DoubleWritable(0.0)));
List<ArrayWritable> records = read(test);
Assert.assertEquals("Should have only one record", 1, records.size());
assertEquals("Should match expected record", expected, records.get(0));
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestDataWritableWriter method testMapType.
@Test
public void testMapType() throws Exception {
String columnNames = "mapCol";
String columnTypes = "map<string,int>";
String fileSchema = "message hive_schema {\n" + " optional group mapCol (MAP) {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key;\n" + " optional int32 value;\n" + " }\n" + " }\n" + "}\n";
ArrayWritable hiveRecord = createGroup(createGroup(createArray(createString("key1"), createInt(1)), createArray(createString("key2"), createInt(2)), createArray(createString("key3"), createNull())));
// Write record to Parquet format
writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
// Verify record was written correctly to Parquet
startMessage();
startField("mapCol", 0);
startGroup();
startField("map", 0);
startGroup();
startField("key", 0);
addString("key1");
endField("key", 0);
startField("value", 1);
addInteger(1);
endField("value", 1);
endGroup();
startGroup();
startField("key", 0);
addString("key2");
endField("key", 0);
startField("value", 1);
addInteger(2);
endField("value", 1);
endGroup();
startGroup();
startField("key", 0);
addString("key3");
endField("key", 0);
endGroup();
endField("map", 0);
endGroup();
endField("mapCol", 0);
endMessage();
}
use of org.apache.hadoop.io.ArrayWritable in project hive by apache.
the class TestDataWritableWriter method testExpectedStructTypeOnRecord.
@Test
public void testExpectedStructTypeOnRecord() throws Exception {
String columnNames = "structCol";
String columnTypes = "int";
ArrayWritable hiveRecord = createGroup(createInt(1));
String fileSchema = "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 int;\n" + " }\n" + "}\n";
try {
writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
fail();
} catch (RuntimeException e) {
assertEquals("Parquet record is malformed: Invalid data type: expected STRUCT type, but found: PRIMITIVE", e.getMessage());
}
}
Aggregations