use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testRandomFillOrcStructWithAnySchema.
@Test
public void testRandomFillOrcStructWithAnySchema() {
// 1. Basic case
TypeDescription schema_1 = TypeDescription.fromString("struct<i:int,j:int,k:int>");
OrcStruct expectedStruct = (OrcStruct) OrcStruct.createValue(schema_1);
expectedStruct.setFieldValue("i", new IntWritable(3));
expectedStruct.setFieldValue("j", new IntWritable(3));
expectedStruct.setFieldValue("k", new IntWritable(3));
OrcStruct actualStruct = (OrcStruct) OrcStruct.createValue(schema_1);
OrcTestUtils.fillOrcStructWithFixedValue(actualStruct, schema_1, 3, "", false);
Assert.assertEquals(actualStruct, expectedStruct);
TypeDescription schema_2 = TypeDescription.fromString("struct<i:boolean,j:int,k:string>");
expectedStruct = (OrcStruct) OrcStruct.createValue(schema_2);
expectedStruct.setFieldValue("i", new BooleanWritable(false));
expectedStruct.setFieldValue("j", new IntWritable(3));
expectedStruct.setFieldValue("k", new Text(""));
actualStruct = (OrcStruct) OrcStruct.createValue(schema_2);
OrcTestUtils.fillOrcStructWithFixedValue(actualStruct, schema_2, 3, "", false);
Assert.assertEquals(actualStruct, expectedStruct);
// 2. Some simple nested cases: struct within struct
TypeDescription schema_3 = TypeDescription.fromString("struct<i:boolean,j:struct<i:boolean,j:int,k:string>>");
OrcStruct expectedStruct_nested_1 = (OrcStruct) OrcStruct.createValue(schema_3);
expectedStruct_nested_1.setFieldValue("i", new BooleanWritable(false));
expectedStruct_nested_1.setFieldValue("j", expectedStruct);
actualStruct = (OrcStruct) OrcStruct.createValue(schema_3);
OrcTestUtils.fillOrcStructWithFixedValue(actualStruct, schema_3, 3, "", false);
Assert.assertEquals(actualStruct, expectedStruct_nested_1);
// 3. array of struct within struct
TypeDescription schema_4 = TypeDescription.fromString("struct<i:boolean,j:array<struct<i:boolean,j:int,k:string>>>");
// Note that this will not create any elements in the array.
expectedStruct_nested_1 = (OrcStruct) OrcStruct.createValue(schema_4);
expectedStruct_nested_1.setFieldValue("i", new BooleanWritable(false));
OrcList list = new OrcList(schema_2, 1);
list.add(expectedStruct);
expectedStruct_nested_1.setFieldValue("j", list);
// Constructing actualStruct: make sure the list is non-Empty. There's any meaningful value within placeholder struct.
actualStruct = (OrcStruct) OrcStruct.createValue(schema_4);
OrcList placeHolderList = new OrcList(schema_2, 1);
OrcStruct placeHolderStruct = (OrcStruct) OrcStruct.createValue(schema_2);
placeHolderList.add(placeHolderStruct);
actualStruct.setFieldValue("j", placeHolderList);
OrcTestUtils.fillOrcStructWithFixedValue(actualStruct, schema_4, 3, "", false);
Assert.assertEquals(actualStruct, expectedStruct_nested_1);
// 4. union of struct within struct
TypeDescription schema_5 = TypeDescription.fromString("struct<i:boolean,j:uniontype<struct<i:boolean,j:int,k:string>>>");
expectedStruct_nested_1 = (OrcStruct) OrcStruct.createValue(schema_5);
expectedStruct_nested_1.setFieldValue("i", new BooleanWritable(false));
OrcUnion union = new OrcUnion(schema_2);
union.set(0, expectedStruct);
expectedStruct_nested_1.setFieldValue("j", union);
// Construct actualStruct: make sure there's a struct-placeholder within the union.
actualStruct = (OrcStruct) OrcStruct.createValue(schema_5);
OrcUnion placeHolderUnion = new OrcUnion(schema_2);
placeHolderUnion.set(0, placeHolderStruct);
actualStruct.setFieldValue("j", placeHolderUnion);
OrcTestUtils.fillOrcStructWithFixedValue(actualStruct, schema_5, 3, "", false);
Assert.assertEquals(actualStruct, expectedStruct_nested_1);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testUpConvertOrcStructOfUnion.
@Test
public void testUpConvertOrcStructOfUnion() {
// Union in struct, type widening within the union's member field.
TypeDescription unionInStructSchema = TypeDescription.fromString("struct<a:uniontype<int,string>>");
OrcStruct unionInStruct = (OrcStruct) OrcStruct.createValue(unionInStructSchema);
OrcUnion placeHolderUnion = new OrcUnion(TypeDescription.fromString("uniontype<int,string>"));
placeHolderUnion.set(0, new IntWritable(1));
unionInStruct.setFieldValue("a", placeHolderUnion);
OrcTestUtils.fillOrcStructWithFixedValue(unionInStruct, unionInStructSchema, intValue1, stringValue1, boolValue);
// Create new structWithUnion
TypeDescription evolved_unionInStructSchema = TypeDescription.fromString("struct<a:uniontype<bigint,string>>");
OrcStruct evolvedUnionInStruct = (OrcStruct) OrcStruct.createValue(evolved_unionInStructSchema);
OrcUnion evolvedPlaceHolderUnion = new OrcUnion(TypeDescription.fromString("uniontype<bigint,string>"));
evolvedPlaceHolderUnion.set(0, new LongWritable(1L));
evolvedUnionInStruct.setFieldValue("a", evolvedPlaceHolderUnion);
OrcUtils.upConvertOrcStruct(unionInStruct, evolvedUnionInStruct, evolved_unionInStructSchema);
// Check in the tag 0(Default from value-filler) within evolvedUnionInStruct, the value is becoming type-widened with correct value.
Assert.assertEquals(((OrcUnion) evolvedUnionInStruct.getFieldValue("a")).getTag(), 0);
Assert.assertEquals(((OrcUnion) evolvedUnionInStruct.getFieldValue("a")).getObject(), new LongWritable(intValue1));
// Check the case when union field is created in different tag.
// Complex: List<Struct> within struct among others and evolution happens on multiple places, also type-widening in deeply nested level.
TypeDescription complexOrcSchema = TypeDescription.fromString("struct<a:array<struct<a:string,b:int>>,b:struct<a:uniontype<int,string>>>");
OrcStruct complexOrcStruct = (OrcStruct) OrcUtils.createValueRecursively(complexOrcSchema);
OrcTestUtils.fillOrcStructWithFixedValue(complexOrcStruct, complexOrcSchema, intValue1, stringValue1, boolValue);
TypeDescription evolvedComplexOrcSchema = TypeDescription.fromString("struct<a:array<struct<a:string,b:bigint,c:string>>,b:struct<a:uniontype<bigint,string>,b:int>>");
OrcStruct evolvedComplexStruct = (OrcStruct) OrcUtils.createValueRecursively(evolvedComplexOrcSchema);
OrcTestUtils.fillOrcStructWithFixedValue(evolvedComplexStruct, evolvedComplexOrcSchema, intValue1, stringValue1, boolValue);
// Check if new columns are assigned with null value and type widening is working fine.
OrcUtils.upConvertOrcStruct(complexOrcStruct, evolvedComplexStruct, evolvedComplexOrcSchema);
Assert.assertEquals(((OrcStruct) ((OrcList) evolvedComplexStruct.getFieldValue("a")).get(0)).getFieldValue("b"), new LongWritable(intValue1));
Assert.assertNull(((OrcStruct) ((OrcList) evolvedComplexStruct.getFieldValue("a")).get(0)).getFieldValue("c"));
Assert.assertEquals(((OrcUnion) ((OrcStruct) evolvedComplexStruct.getFieldValue("b")).getFieldValue("a")).getObject(), new LongWritable(intValue1));
Assert.assertNull(((OrcStruct) evolvedComplexStruct.getFieldValue("b")).getFieldValue("b"));
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testOrcStructProjection.
/**
* Just a sanity test for column project, should be no difference from other cases when provided reader schema.
*/
@Test
public void testOrcStructProjection() {
TypeDescription originalSchema = TypeDescription.fromString("struct<a:struct<a:int,b:int>,b:struct<c:int,d:int>,c:int>");
OrcStruct originalStruct = (OrcStruct) OrcUtils.createValueRecursively(originalSchema);
OrcTestUtils.fillOrcStructWithFixedValue(originalStruct, originalSchema, intValue1, stringValue1, boolValue);
TypeDescription projectedSchema = TypeDescription.fromString("struct<a:struct<b:int>,b:struct<c:int>>");
OrcStruct projectedStructExpectedValue = (OrcStruct) OrcUtils.createValueRecursively(projectedSchema);
OrcTestUtils.fillOrcStructWithFixedValue(projectedStructExpectedValue, projectedSchema, intValue1, stringValue1, boolValue);
OrcStruct projectColumnStruct = (OrcStruct) OrcUtils.createValueRecursively(projectedSchema);
OrcUtils.upConvertOrcStruct(originalStruct, projectColumnStruct, projectedSchema);
Assert.assertEquals(projectColumnStruct, projectedStructExpectedValue);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testUpConvertOrcStructOfList.
@Test
public void testUpConvertOrcStructOfList() {
// Simple Nested: List within Struct.
// The element type of list contains a new field.
// Prepare two ListInStructs with different size ( the list field contains different number of members)
TypeDescription structOfListSchema = TypeDescription.fromString("struct<a:array<struct<a:int,b:string>>>");
OrcStruct structOfList = (OrcStruct) OrcUtils.createValueRecursively(structOfListSchema);
// Create an OrcList instance with two entries
TypeDescription innerStructSchema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", TypeDescription.createString());
OrcStruct innerStruct1 = new OrcStruct(innerStructSchema);
innerStruct1.setFieldValue("a", new IntWritable(intValue1));
innerStruct1.setFieldValue("b", new Text(stringValue1));
OrcStruct innerStruct2 = new OrcStruct(innerStructSchema);
innerStruct2.setFieldValue("a", new IntWritable(intValue2));
innerStruct2.setFieldValue("b", new Text(stringValue2));
TypeDescription listSchema = TypeDescription.createList(innerStructSchema);
OrcList orcList = new OrcList(listSchema);
orcList.add(innerStruct1);
orcList.add(innerStruct2);
structOfList.setFieldValue("a", orcList);
TypeDescription evolvedStructOfListSchema = TypeDescription.fromString("struct<a:array<struct<a:int,b:string,c:int>>>");
OrcStruct evolvedStructOfList = (OrcStruct) OrcUtils.createValueRecursively(evolvedStructOfListSchema);
// Convert and verify contents.
OrcUtils.upConvertOrcStruct(structOfList, evolvedStructOfList, evolvedStructOfListSchema);
Assert.assertEquals(((IntWritable) ((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(0)).getFieldValue("a")).get(), intValue1);
Assert.assertEquals(((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(0)).getFieldValue("b").toString(), stringValue1);
Assert.assertNull((((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(0)).getFieldValue("c")));
Assert.assertEquals(((IntWritable) ((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(1)).getFieldValue("a")).get(), intValue2);
Assert.assertEquals(((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(1)).getFieldValue("b").toString(), stringValue2);
Assert.assertNull((((OrcStruct) ((OrcList) evolvedStructOfList.getFieldValue("a")).get(1)).getFieldValue("c")));
// Create a list in source OrcStruct with 3 elements
structOfList = (OrcStruct) OrcUtils.createValueRecursively(structOfListSchema, 3);
OrcTestUtils.fillOrcStructWithFixedValue(structOfList, structOfListSchema, intValue1, stringValue1, boolValue);
Assert.assertNotEquals(((OrcList) structOfList.getFieldValue("a")).size(), ((OrcList) evolvedStructOfList.getFieldValue("a")).size());
OrcUtils.upConvertOrcStruct(structOfList, evolvedStructOfList, evolvedStructOfListSchema);
Assert.assertEquals(((OrcList) evolvedStructOfList.getFieldValue("a")).size(), 3);
// Original has list.size()=0, target has list.size() = 1
((OrcList) structOfList.getFieldValue("a")).clear();
OrcUtils.upConvertOrcStruct(structOfList, evolvedStructOfList, evolvedStructOfListSchema);
Assert.assertEquals(((OrcList) evolvedStructOfList.getFieldValue("a")).size(), 0);
}
Aggregations