use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testNonVectorReaderFooterSerialize.
@Test
public void testNonVectorReaderFooterSerialize() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable2");
conf.set("hive.orc.splits.include.file.footer", "true");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=true"));
assertTrue(split.toString().contains("hasBase=true"));
assertTrue(split.toString().contains("deltas=0"));
if (split instanceof OrcSplit) {
assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
}
orcInputFormat.getRecordReader(split, conf, null);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open to read data - split 1 => mock:/mocktable2/0_0
// call-2: open to read data - split 2 => mock:/mocktable2/0_1
assertEquals(2, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testACIDReaderNoFooterSerializeWithDeltas.
@Test
public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception {
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
FileSystem fs = FileSystem.get(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable7");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
conf.set("hive.orc.splits.include.file.footer", "false");
conf.set("mapred.input.dir", mockPath.toString());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumWriteId(1).maximumWriteId(1).inspector(inspector).finalDestination(mockPath);
OrcOutputFormat of = new OrcOutputFormat();
RecordUpdater ru = of.getRecordUpdater(mockPath, options);
for (int i = 0; i < 10; ++i) {
ru.insert(options.getMinimumWriteId(), new MyRow(i, 2 * i));
}
// this deletes the side file
ru.close(false);
// set up props for read
conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
AcidUtils.setAcidOperationalProperties(conf, true, null);
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=false"));
assertTrue(split.toString().contains("hasBase=true"));
assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open(mock:/mocktable7/0_0)
// call-2: open(mock:/mocktable7/0_0)
// call-3: listLocatedFileStatuses(mock:/mocktable7)
// call-4: getFileStatus(mock:/mocktable7/delta_0000001_0000001_0000/_metadata_acid)
// call-5: open(mock:/mocktable7/delta_0000001_0000001_0000/bucket_00001)
// call-6: getFileStatus(mock:/mocktable7/delta_0000001_0000001_0000/_metadata_acid)
// call-7: open(mock:/mocktable7/delta_0000001_0000001_0000/bucket_00001)
assertEquals(7, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testVectorization.
/**
* Test vectorization, non-acid, non-combine.
* @throws Exception
*/
@Test
public void testVectorization() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorization", inspector, true, 1);
// write the orc file to the mock file system
Path path = new Path(conf.get("mapred.input.dir") + "/0_0");
Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
setBlocks(path, conf, new MockBlock("host0", "host1"));
// call getsplits
HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 10);
assertEquals(1, splits.length);
org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
NullWritable key = reader.createKey();
VectorizedRowBatch value = reader.createValue();
assertEquals(true, reader.next(key, value));
assertEquals(10, value.count());
LongColumnVector col0 = (LongColumnVector) value.cols[0];
for (int i = 0; i < 10; i++) {
assertEquals("checking " + i, i, col0.vector[i]);
}
assertEquals(false, reader.next(key, value));
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestLazyBinarySerDe method testLazyBinarySerDe.
/**
* The test entrance function.
*
* @throws Throwable
*/
public void testLazyBinarySerDe() throws Throwable {
try {
System.out.println("Beginning Test TestLazyBinarySerDe:");
// generate the data
int num = 1000;
Random r = new Random(1234);
MyTestClass[] rows = new MyTestClass[num];
for (int i = 0; i < num; i++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
rows[i] = t;
}
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
// call the tests
// 1/ test LazyBinarySerDe
testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes));
// 2/ test LazyBinaryMap
testLazyBinaryMap(r);
// 3/ test serialization and deserialization with different schemas
testShorterSchemaDeserialization(r);
// 4/ test serialization and deserialization with different schemas
testLongerSchemaDeserialization(r);
// 5/ test serialization and deserialization with different schemas
testShorterSchemaDeserialization1(r);
// 6/ test serialization and deserialization with different schemas
testLongerSchemaDeserialization1(r);
System.out.println("Test TestLazyBinarySerDe passed!");
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestLazyBinarySerDe method testShorterSchemaDeserialization.
/**
* Test shorter schema deserialization where a bigger struct is serialized and
* it is then deserialized with a smaller struct. Here the serialized struct
* has 10 fields and we deserialized to a struct of 9 fields.
*/
private void testShorterSchemaDeserialization(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClassBigger t = new MyTestClassBigger();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
Aggregations