use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestStatsSerde method testLazyBinarySerDe.
/**
* Test LazyBinarySerDe
*/
public void testLazyBinarySerDe() throws Throwable {
try {
System.out.println("test: testLazyBinarySerDe");
int num = 1000;
Random r = new Random(1234);
MyTestClass[] rows = new MyTestClass[num];
for (int i = 0; i < num; i++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
rows[i] = t;
}
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
Properties schema = new Properties();
schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
LazyBinarySerDe serDe = new LazyBinarySerDe();
SerDeUtils.initializeSerDe(serDe, new Configuration(), schema, null);
deserializeAndSerializeLazyBinary(serDe, rows, rowOI);
System.out.println("test: testLazyBinarySerDe - OK");
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testHandlingAlteredSchemas.
/**
* HIVE-5788
* <p>
* Background: in cases of "add column", table metadata changes but data does not. Columns
* missing from the data but which are required by metadata are interpreted as null.
* <p>
* This tests the use-case of altering columns of a table with already some data, then adding more data
* in the new schema, and seeing if this serde can to read both types of data from the resultant table.
* @throws SerDeException
*/
public void testHandlingAlteredSchemas() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(BeforeStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
// serialize some data in the schema before it is altered.
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
BeforeStruct bs1 = new BeforeStruct();
bs1.l1 = 1L;
bs1.l2 = 2L;
BytesRefArrayWritable braw1 = (BytesRefArrayWritable) serde.serialize(bs1, oi);
// alter table add column: change the metadata
oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(AfterStruct.class, ObjectInspectorOptions.JAVA);
cols = ObjectInspectorUtils.getFieldNames(oi);
props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
// serialize some data in the schema after it is altered.
AfterStruct as = new AfterStruct();
as.l1 = 11L;
as.l2 = 12L;
as.l3 = 13L;
BytesRefArrayWritable braw2 = (BytesRefArrayWritable) serde.serialize(as, oi);
// fetch operator
serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
// fetch the row inserted before schema is altered and verify
LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde.deserialize(braw1);
oi = (StructObjectInspector) serde.getObjectInspector();
List<Object> objs1 = oi.getStructFieldsDataAsList(struct1);
Assert.assertEquals(((LongWritable) objs1.get(0)).get(), 1L);
Assert.assertEquals(((LongWritable) objs1.get(1)).get(), 2L);
Assert.assertNull(objs1.get(2));
// fetch the row inserted after schema is altered and verify
LazyBinaryColumnarStruct struct2 = (LazyBinaryColumnarStruct) serde.deserialize(braw2);
List<Object> objs2 = struct2.getFieldsAsList();
Assert.assertEquals(((LongWritable) objs2.get(0)).get(), 11L);
Assert.assertEquals(((LongWritable) objs2.get(1)).get(), 12L);
Assert.assertEquals(((LongWritable) objs2.get(2)).get(), 13L);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testNonVectorReaderNoFooterSerialize.
@Test
public void testNonVectorReaderNoFooterSerialize() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable1");
conf.set("hive.orc.splits.include.file.footer", "false");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=false"));
assertTrue(split.toString().contains("hasBase=true"));
assertTrue(split.toString().contains("deltas=0"));
if (split instanceof OrcSplit) {
assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
}
orcInputFormat.getRecordReader(split, conf, null);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open to read footer - split 1 => mock:/mocktable1/0_0
// call-2: open to read data - split 1 => mock:/mocktable1/0_0
// call-3: open to read footer - split 2 => mock:/mocktable1/0_1
// call-4: open to read data - split 2 => mock:/mocktable1/0_1
assertEquals(4, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testVectorReaderFooterSerialize.
@Test
public void testVectorReaderFooterSerialize() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable4");
conf.set("hive.orc.splits.include.file.footer", "true");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "mocktable4", inspector, true, 0);
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=true"));
assertTrue(split.toString().contains("hasBase=true"));
assertTrue(split.toString().contains("deltas=0"));
if (split instanceof OrcSplit) {
assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
}
orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open to read data - split 1 => mock:/mocktable4/0_0
// call-2: open to read data - split 2 => mock:/mocktable4/0_1
assertEquals(2, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testACIDReaderFooterSerialize.
@Test
public void testACIDReaderFooterSerialize() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable6");
conf.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
conf.set("hive.orc.splits.include.file.footer", "true");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=true"));
assertTrue(split.toString().contains("hasBase=true"));
assertTrue(split.toString().contains("deltas=0"));
assertTrue(split.toString().contains("isOriginal=true"));
if (split instanceof OrcSplit) {
assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
}
orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open to read data - split 1 => mock:/mocktable6/0_0
// call-2: AcidUtils.getAcidState - split 1 => ls mock:/mocktable6
// call-3: open to read data - split 2 => mock:/mocktable6/0_1
// call-4: AcidUtils.getAcidState - split 2 => ls mock:/mocktable6
// call-5: read footer - split 2 => mock:/mocktable6/0_0 (to get offset since it's original file)
// call-6: file stat - split 2 => mock:/mocktable6/0_0
assertEquals(6, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
Aggregations