use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparator method setConf.
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
if (null != conf) {
// The MapReduce framework will be using this comparator to sort OrcKey objects
// output from the map phase, so use the schema defined for the map output key
// and the data model non-raw compare() implementation.
schema = TypeDescription.fromString(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
OrcStruct orcRecordModel1 = (OrcStruct) OrcStruct.createValue(schema);
OrcStruct orcRecordModel2 = (OrcStruct) OrcStruct.createValue(schema);
if (key1 == null) {
key1 = new OrcKey();
}
if (key2 == null) {
key2 = new OrcKey();
}
if (buffer == null) {
buffer = new DataInputBuffer();
}
key1.key = orcRecordModel1;
key2.key = orcRecordModel2;
}
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcTestUtils method fillOrcStructWithFixedValue.
/**
* Fill in value in OrcStruct with given schema, assuming {@param w} contains the same schema as {@param schema}.
* {@param schema} is still necessary to given given {@param w} do contains schema information itself, because the
* actual value type is only available in {@link TypeDescription} but not {@link org.apache.orc.mapred.OrcValue}.
*
* For simplicity here are some assumptions:
* - We only give 3 primitive values and use them to construct compound values. To make it work for different types that
* can be widened or shrunk to each other, please use value within small range.
* - For List, Map or Union, make sure there's at least one entry within the record-container.
* you may want to try createValueRecursively(TypeDescription) instead of {@link OrcStruct#createValue(TypeDescription)}
*/
public static void fillOrcStructWithFixedValue(WritableComparable w, TypeDescription schema, int unionTag, int intValue, String stringValue, boolean booleanValue) {
switch(schema.getCategory()) {
case BOOLEAN:
((BooleanWritable) w).set(booleanValue);
break;
case BYTE:
((ByteWritable) w).set((byte) intValue);
break;
case SHORT:
((ShortWritable) w).set((short) intValue);
break;
case INT:
((IntWritable) w).set(intValue);
break;
case LONG:
((LongWritable) w).set(intValue);
break;
case FLOAT:
((FloatWritable) w).set(intValue * 1.0f);
break;
case DOUBLE:
((DoubleWritable) w).set(intValue * 1.0);
break;
case STRING:
case CHAR:
case VARCHAR:
((Text) w).set(stringValue);
break;
case BINARY:
throw new UnsupportedOperationException("Binary type is not supported in random orc data filler");
case DECIMAL:
throw new UnsupportedOperationException("Decimal type is not supported in random orc data filler");
case DATE:
case TIMESTAMP:
case TIMESTAMP_INSTANT:
throw new UnsupportedOperationException("Timestamp and its derived types is not supported in random orc data filler");
case LIST:
OrcList castedList = (OrcList) w;
// to at least contain one element, or the traversing within the list will be skipped.
for (Object i : castedList) {
fillOrcStructWithFixedValue((WritableComparable) i, schema.getChildren().get(0), unionTag, intValue, stringValue, booleanValue);
}
break;
case MAP:
OrcMap castedMap = (OrcMap) w;
for (Object entry : castedMap.entrySet()) {
Map.Entry<WritableComparable, WritableComparable> castedEntry = (Map.Entry<WritableComparable, WritableComparable>) entry;
fillOrcStructWithFixedValue(castedEntry.getKey(), schema.getChildren().get(0), unionTag, intValue, stringValue, booleanValue);
fillOrcStructWithFixedValue(castedEntry.getValue(), schema.getChildren().get(1), unionTag, intValue, stringValue, booleanValue);
}
break;
case STRUCT:
OrcStruct castedStruct = (OrcStruct) w;
int fieldIdx = 0;
for (TypeDescription child : schema.getChildren()) {
fillOrcStructWithFixedValue(castedStruct.getFieldValue(fieldIdx), child, unionTag, intValue, stringValue, booleanValue);
fieldIdx += 1;
}
break;
case UNION:
OrcUnion castedUnion = (OrcUnion) w;
TypeDescription targetMemberSchema = schema.getChildren().get(unionTag);
castedUnion.set(unionTag, OrcUtils.createValueRecursively(targetMemberSchema));
fillOrcStructWithFixedValue((WritableComparable) castedUnion.getObject(), targetMemberSchema, unionTag, intValue, stringValue, booleanValue);
break;
default:
throw new IllegalArgumentException("Unknown type " + schema.toString());
}
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testNestedWithinUnionWithDiffTag.
@Test
public void testNestedWithinUnionWithDiffTag() {
// Construct union type with different tag for the src object dest object, check if up-convert happens correctly.
TypeDescription structInUnionAsStruct = TypeDescription.fromString("struct<a:uniontype<struct<a:int,b:string>,int>>");
OrcStruct structInUnionAsStructObject = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcTestUtils.fillOrcStructWithFixedValue(structInUnionAsStructObject, structInUnionAsStruct, 0, intValue1, stringValue1, boolValue);
Assert.assertEquals(((OrcStruct) ((OrcUnion) structInUnionAsStructObject.getFieldValue("a")).getObject()).getFieldValue("a"), new IntWritable(intValue1));
OrcStruct structInUnionAsStructObject_2 = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcTestUtils.fillOrcStructWithFixedValue(structInUnionAsStructObject_2, structInUnionAsStruct, 1, intValue1, stringValue1, boolValue);
Assert.assertEquals(((OrcUnion) structInUnionAsStructObject_2.getFieldValue("a")).getObject(), new IntWritable(intValue1));
// Create a new record container, do up-convert twice and check if the value is propagated properly.
OrcStruct container = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcUtils.upConvertOrcStruct(structInUnionAsStructObject, container, structInUnionAsStruct);
Assert.assertEquals(structInUnionAsStructObject, container);
OrcUtils.upConvertOrcStruct(structInUnionAsStructObject_2, container, structInUnionAsStruct);
Assert.assertEquals(structInUnionAsStructObject_2, container);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testUpConvertSimpleOrcStruct.
@Test
public void testUpConvertSimpleOrcStruct() {
// Basic case, all primitives, newly added value will be set to null
TypeDescription baseStructSchema = TypeDescription.fromString("struct<a:int,b:string>");
// This would be re-used in the following tests as the actual record using the schema.
OrcStruct baseStruct = (OrcStruct) OrcStruct.createValue(baseStructSchema);
// Fill in the baseStruct with specified value.
OrcTestUtils.fillOrcStructWithFixedValue(baseStruct, baseStructSchema, intValue1, stringValue1, boolValue);
TypeDescription evolved_baseStructSchema = TypeDescription.fromString("struct<a:int,b:string,c:int>");
OrcStruct evolvedStruct = (OrcStruct) OrcStruct.createValue(evolved_baseStructSchema);
// This should be equivalent to deserialize(baseStruct).serialize(evolvedStruct, evolvedSchema);
OrcUtils.upConvertOrcStruct(baseStruct, evolvedStruct, evolved_baseStructSchema);
// Check if all value in baseStruct is populated and newly created column in evolvedStruct is filled with null.
Assert.assertEquals(((IntWritable) evolvedStruct.getFieldValue("a")).get(), intValue1);
Assert.assertEquals(evolvedStruct.getFieldValue("b").toString(), stringValue1);
Assert.assertNull(evolvedStruct.getFieldValue("c"));
// Base case: Reverse direction, which is column projection on top-level columns.
OrcStruct baseStruct_shadow = (OrcStruct) OrcStruct.createValue(baseStructSchema);
OrcUtils.upConvertOrcStruct(evolvedStruct, baseStruct_shadow, baseStructSchema);
Assert.assertEquals(baseStruct, baseStruct_shadow);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testUpConvertOrcStructOfMap.
@Test
public void testUpConvertOrcStructOfMap() {
// Map within Struct, contains a type-widening in the map-value type.
TypeDescription structOfMapSchema = TypeDescription.fromString("struct<a:map<string,int>>");
OrcStruct structOfMap = (OrcStruct) OrcStruct.createValue(structOfMapSchema);
TypeDescription mapSchema = TypeDescription.createMap(TypeDescription.createString(), TypeDescription.createInt());
OrcMap testMap = new OrcMap(mapSchema);
// Add dummy entries to initialize the testMap. The actual keys and values will be set later.
testMap.put(new Text(stringValue1), new IntWritable(intValue1));
testMap.put(new Text(stringValue2), new IntWritable(intValue2));
structOfMap.setFieldValue("a", testMap);
// Create the target struct with evolved schema
TypeDescription evolvedStructOfMapSchema = TypeDescription.fromString("struct<a:map<string,bigint>>");
OrcStruct evolvedStructOfMap = (OrcStruct) OrcStruct.createValue(evolvedStructOfMapSchema);
OrcMap evolvedMap = new OrcMap(TypeDescription.createMap(TypeDescription.createString(), TypeDescription.createInt()));
// Initialize a map
evolvedMap.put(new Text(""), new LongWritable());
evolvedStructOfMap.setFieldValue("a", evolvedMap);
// convert and verify: Type-widening is correct, and size of output file is correct.
OrcUtils.upConvertOrcStruct(structOfMap, evolvedStructOfMap, evolvedStructOfMapSchema);
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).get(new Text(stringValue1)), new LongWritable(intValue1));
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).get(new Text(stringValue2)), new LongWritable(intValue2));
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).size(), 2);
// re-use the same object but the source struct has fewer member in the map entry.
testMap.put(new Text(stringValue3), new IntWritable(intValue3));
// sanity check
Assert.assertEquals(((OrcMap) structOfMap.getFieldValue("a")).size(), 3);
OrcUtils.upConvertOrcStruct(structOfMap, evolvedStructOfMap, evolvedStructOfMapSchema);
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).size(), 3);
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).get(new Text(stringValue1)), new LongWritable(intValue1));
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).get(new Text(stringValue2)), new LongWritable(intValue2));
Assert.assertEquals(((OrcMap) evolvedStructOfMap.getFieldValue("a")).get(new Text(stringValue3)), new LongWritable(intValue3));
}
Aggregations