use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtilsTest method testNestedFieldSequenceSet.
/**
* This test mostly target at the following case:
* Schema: struct<a:array<struct<a:int,b:int>>>
* field a was set to null by one call of "upConvertOrcStruct", but the subsequent call should still have the nested
* field filled.
*/
public void testNestedFieldSequenceSet() {
TypeDescription schema = TypeDescription.fromString("struct<a:array<struct<a:int,b:int>>>");
OrcStruct struct = (OrcStruct) OrcUtils.createValueRecursively(schema);
OrcTestUtils.fillOrcStructWithFixedValue(struct, schema, 1, "test", true);
OrcStruct structWithEmptyArray = (OrcStruct) OrcUtils.createValueRecursively(schema);
OrcTestUtils.fillOrcStructWithFixedValue(structWithEmptyArray, schema, 1, "test", true);
structWithEmptyArray.setFieldValue("a", null);
OrcUtils.upConvertOrcStruct(structWithEmptyArray, struct, schema);
Assert.assertEquals(struct, structWithEmptyArray);
OrcStruct struct_2 = (OrcStruct) OrcUtils.createValueRecursively(schema);
OrcTestUtils.fillOrcStructWithFixedValue(struct_2, schema, 2, "test", true);
OrcUtils.upConvertOrcStruct(struct_2, struct, schema);
Assert.assertEquals(struct, struct_2);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method deserializeOrcRecords.
public static final List<Writable> deserializeOrcRecords(Path orcFilePath, FileSystem fs) throws IOException {
org.apache.orc.Reader fileReader = OrcFile.createReader(orcFilePath, new OrcFile.ReaderOptions(new Configuration()));
RecordReader recordReader = fileReader.rows();
TypeDescription schema = fileReader.getSchema();
VectorizedRowBatch batch = schema.createRowBatch();
recordReader.nextBatch(batch);
int rowInBatch = 0;
// result container
List<Writable> orcRecords = new ArrayList<>();
long rowCount = fileReader.getNumberOfRows();
while (rowCount > 0) {
// Deserialize records using Mapreduce-like API
if (schema.getCategory() == TypeDescription.Category.STRUCT) {
OrcStruct result = (OrcStruct) OrcStruct.createValue(fileReader.getSchema());
List<TypeDescription> children = schema.getChildren();
int numberOfChildren = children.size();
for (int i = 0; i < numberOfChildren; ++i) {
result.setFieldValue(i, nextValue(batch.cols[i], rowInBatch, children.get(i), result.getFieldValue(i)));
}
orcRecords.add(result);
} else {
throw new UnsupportedOperationException("The serialized records have to be a struct in the outer-most layer.");
}
rowCount -= 1;
rowInBatch += 1;
}
return orcRecords;
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method getUnionFieldFromStruct.
/**
* Accessing "fields" using reflection to work-around access modifiers.
*/
private OrcUnion getUnionFieldFromStruct(Writable struct) {
try {
OrcStruct orcStruct = (OrcStruct) struct;
Field objectArr = OrcStruct.class.getDeclaredField("fields");
objectArr.setAccessible(true);
return (OrcUnion) ((Object[]) objectArr.get(orcStruct))[0];
} catch (Exception e) {
throw new RuntimeException("Cannot access with reflection", e);
}
}
use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcStructConverter method convertField.
/**
* Convert a orc struct field as though it were a map, by fieldIndex. Complex types will be transformed
* into java lists and maps when possible ({@link OrcStructConverter#convertList} and
* {@link OrcStructConverter#convertMap}), and
* primitive types will be extracted into an ingestion friendly state (e.g. 'int' and 'long'). Finally,
* if a field is not present, this method will return null.
*
* Note: "Union" types are not currently supported and will be returned as null
*/
@Nullable
Object convertField(OrcStruct struct, int fieldIndex) {
if (fieldIndex < 0) {
return null;
}
TypeDescription schema = struct.getSchema();
TypeDescription fieldDescription = schema.getChildren().get(fieldIndex);
WritableComparable fieldValue = struct.getFieldValue(fieldIndex);
if (fieldValue == null) {
return null;
}
if (fieldDescription.getCategory().isPrimitive()) {
return convertPrimitive(fieldDescription, fieldValue, binaryAsString);
} else {
/*
ORC TYPE WRITABLE TYPE
array org.apache.orc.mapred.OrcList
map org.apache.orc.mapred.OrcMap
struct org.apache.orc.mapred.OrcStruct
uniontype org.apache.orc.mapred.OrcUnion
*/
switch(fieldDescription.getCategory()) {
case LIST:
OrcList orcList = (OrcList) fieldValue;
return convertList(fieldDescription, orcList, binaryAsString);
case MAP:
OrcMap map = (OrcMap) fieldValue;
return convertMap(fieldDescription, map, binaryAsString);
case STRUCT:
OrcStruct structMap = (OrcStruct) fieldValue;
return convertStructToMap(structMap);
case UNION:
// sorry union types :(
default:
return null;
}
}
}
use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcHadoopInputRowParserTest method getFirstRow.
private static OrcStruct getFirstRow(Job job, String orcPath) throws IOException {
File testFile = new File(orcPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), new String[] { "host" });
InputFormat<NullWritable, OrcStruct> inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, new JobConf(job.getConfiguration()), null);
try {
final NullWritable key = reader.createKey();
final OrcStruct value = reader.createValue();
if (reader.next(key, value)) {
return value;
} else {
throw new NoSuchElementException();
}
} finally {
reader.close();
}
}
Aggregations