use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcRawRecordMerger method testEmpty.
@Test
public void testEmpty() throws Exception {
final int BUCKET = 0;
Configuration conf = new Configuration();
OrcOutputFormat of = new OrcOutputFormat();
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testEmpty").makeQualified(fs);
fs.delete(root, true);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
// write the empty base
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).writingBase(true).maximumTransactionId(100).finalDestination(root);
of.getRecordUpdater(root, options).close(false);
ValidTxnList txnList = new ValidReadTxnList("200:" + Long.MAX_VALUE);
AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()));
RecordIdentifier key = merger.createKey();
OrcStruct value = merger.createValue();
assertEquals(false, merger.next(key, value));
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcRecordUpdater method testUpdates.
@Test
public void testUpdates() throws Exception {
Path root = new Path(workDir, "testUpdates");
Configuration conf = new Configuration();
FileSystem fs = root.getFileSystem(conf);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
int bucket = 20;
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(bucket).writingBase(false).minimumTransactionId(100).maximumTransactionId(100).inspector(inspector).reporter(Reporter.NULL).recordIdColumn(1).finalDestination(root);
RecordUpdater updater = new OrcRecordUpdater(root, options);
updater.update(100, new MyRow("update", 30, 10, bucket));
updater.delete(100, new MyRow("", 60, 40, bucket));
assertEquals(-1L, updater.getStats().getRowCount());
updater.close(false);
Path bucketPath = AcidUtils.createFilename(root, options);
Reader reader = OrcFile.createReader(bucketPath, new OrcFile.ReaderOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());
RecordReader rows = reader.rows();
// check the contents of the file
assertEquals(true, rows.hasNext());
OrcStruct row = (OrcStruct) rows.next(null);
assertEquals(OrcRecordUpdater.UPDATE_OPERATION, OrcRecordUpdater.getOperation(row));
assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(10, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(20, OrcRecordUpdater.getBucket(row));
assertEquals(30, OrcRecordUpdater.getRowId(row));
assertEquals("update", OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(40, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(20, OrcRecordUpdater.getBucket(row));
assertEquals(60, OrcRecordUpdater.getRowId(row));
assertNull(OrcRecordUpdater.getRow(row));
assertEquals(false, rows.hasNext());
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcRecordUpdater method testWriterTblProperties.
@Test
public void testWriterTblProperties() throws Exception {
Path root = new Path(workDir, "testWriterTblProperties");
Configuration conf = new Configuration();
// Must use raw local because the checksummer doesn't honor flushes.
FileSystem fs = FileSystem.getLocal(conf).getRaw();
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Properties tblProps = new Properties();
tblProps.setProperty("orc.compress", "SNAPPY");
tblProps.setProperty("orc.compress.size", "8192");
HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO, 4);
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(10).writingBase(false).minimumTransactionId(10).maximumTransactionId(19).inspector(inspector).reporter(Reporter.NULL).finalDestination(root).tableProperties(tblProps);
RecordUpdater updater = new OrcRecordUpdater(root, options);
updater.insert(11, new MyRow("first"));
updater.insert(11, new MyRow("second"));
updater.insert(11, new MyRow("third"));
updater.flush();
updater.insert(12, new MyRow("fourth"));
updater.insert(12, new MyRow("fifth"));
updater.flush();
PrintStream origOut = System.out;
ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
FileDump.main(new String[] { root.toUri().toString() });
System.out.flush();
String outDump = new String(myOut.toByteArray());
assertEquals(true, outDump.contains("Compression: SNAPPY"));
assertEquals(true, outDump.contains("Compression size: 2048"));
System.setOut(origOut);
updater.close(false);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcSerDeStats method testStringAndBinaryStatistics.
@Test
public void testStringAndBinaryStatistics() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcSerDeStats.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
writer.addRow(new SimpleStruct(null, "hi"));
writer.close();
assertEquals(4, writer.getNumberOfRows());
assertEquals(273, writer.getRawDataSize());
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(4, reader.getNumberOfRows());
assertEquals(273, reader.getRawDataSize());
assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
assertEquals("count: 4 hasNull: false", stats[0].toString());
assertEquals(3, stats[1].getNumberOfValues());
assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
assertEquals(3, stats[2].getNumberOfValues());
assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8", stats[2].toString());
// check the inspectors
StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
assertEquals("struct<bytes1:binary,string1:string>", readerInspector.getTypeName());
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
// check the contents of the first row
assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertEquals(bytes(0, 1, 2, 3), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertEquals(bytes(0, 1, 2, 3, 4, 5), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertNull(st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertNull(bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// handle the close up
assertEquals(false, rows.hasNext());
rows.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcSerDeStats method testOrcSerDeStatsMap.
@Test
public void testOrcSerDeStatsMap() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcSerDeStats.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(10000).bufferSize(10000));
for (int row = 0; row < 1000; row++) {
Map<String, Double> test = new HashMap<String, Double>();
for (int i = 0; i < 10; i++) {
test.put("hi" + i, 2.0);
}
writer.addRow(new MapStruct(test));
}
writer.close();
// stats from writer
assertEquals(1000, writer.getNumberOfRows());
assertEquals(950000, writer.getRawDataSize());
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
// stats from reader
assertEquals(1000, reader.getNumberOfRows());
assertEquals(950000, reader.getRawDataSize());
assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1")));
}
Aggregations