use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testInOutFormat.
@Test
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
writer.write(serde.serialize(new MyRow(1, 2), inspector));
writer.write(serde.serialize(new MyRow(2, 2), inspector));
writer.write(serde.serialize(new MyRow(3, 2), inspector));
writer.close(true);
serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<x:int,y:int>", inspector.getTypeName());
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
// the the validate input method
ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(testFilePath));
assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(workDir));
assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
// read the whole file
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Writable value = (Writable) reader.createValue();
int rowNum = 0;
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
assertEquals(1.0, reader.getProgress(), 0.00001);
reader.close();
// read just the first column
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
}
assertEquals(3, rowNum);
reader.close();
// test the mapping of empty string to all columns
ColumnProjectionUtils.setReadAllColumns(conf);
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
reader.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testSplitGenReadOpsLocalCache.
@Test
public void testSplitGenReadOpsLocalCache() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
// creates the static cache
MockPath mockPath = new MockPath(fs, "mock:///mocktbl");
conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "0");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl
// call-2: check existence of side file for mock:/mocktbl/0_0
// call-3: open - mock:/mocktbl/0_0
// call-4: check existence of side file for mock:/mocktbl/0_1
// call-5: open - mock:/mocktbl/0_1
assertEquals(5, readOpsDelta);
// force BI to avoid reading footers
conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl
// call-2: check existence of side file for mock:/mocktbl/0_0
// call-3: check existence of side file for mock:/mocktbl/0_1
assertEquals(3, readOpsDelta);
// enable cache and use default strategy
conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb");
conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl
// call-2: check existence of side file for mock:/mocktbl/0_0
// call-3: open - mock:/mocktbl/0_0
// call-4: check existence of side file for mock:/mocktbl/0_1
// call-5: open - mock:/mocktbl/0_1
assertEquals(5, readOpsDelta);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl
assertEquals(1, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testWithoutIndex.
/**
* Read and write a randomly generated snappy file.
* @throws Exception
*/
@Test
public void testWithoutIndex() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(5000).compress(CompressionKind.SNAPPY).bufferSize(1000).rowIndexStride(0));
Random rand = new Random(24);
for (int i = 0; i < 10000; ++i) {
InnerStruct row = new InnerStruct(rand.nextInt(), Integer.toBinaryString(rand.nextInt()));
for (int j = 0; j < 5; ++j) {
writer.addRow(row);
}
}
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(50000, reader.getNumberOfRows());
assertEquals(0, reader.getRowIndexStride());
StripeInformation stripe = reader.getStripes().iterator().next();
assertEquals(true, stripe.getDataLength() != 0);
assertEquals(0, stripe.getIndexLength());
RecordReader rows = reader.rows();
rand = new Random(24);
OrcStruct row = null;
for (int i = 0; i < 10000; ++i) {
int intVal = rand.nextInt();
String strVal = Integer.toBinaryString(rand.nextInt());
for (int j = 0; j < 5; ++j) {
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(row);
assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
assertEquals(strVal, row.getFieldValue(1).toString());
}
}
assertEquals(false, rows.hasNext());
rows.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcRawRecordMerger method testNewBaseAndDelta.
private void testNewBaseAndDelta(boolean use130Format) throws Exception {
final int BUCKET = 10;
String[] values = new String[] { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth" };
Configuration conf = new Configuration();
OrcOutputFormat of = new OrcOutputFormat();
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
fs.delete(root, true);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
// write the base
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).finalDestination(root);
final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
if (!use130Format) {
options.statementId(-1);
}
RecordUpdater ru = of.getRecordUpdater(root, options.writingBase(true).maximumWriteId(100));
for (String v : values) {
ru.insert(0, new MyRow(v));
}
ru.close(false);
// write a delta
ru = of.getRecordUpdater(root, options.writingBase(false).minimumWriteId(200).maximumWriteId(200).recordIdColumn(1));
ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY));
ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
ru.update(200, new MyRow("update 3", 3, 0, BUCKET_PROPERTY));
ru.delete(200, new MyRow("", 7, 0, BUCKET_PROPERTY));
ru.delete(200, new MyRow("", 8, 0, BUCKET_PROPERTY));
ru.close(false);
ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE);
AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, writeIdList);
assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
assertEquals(new Path(root, use130Format ? AcidUtils.deleteDeltaSubdir(200, 200, 0) : AcidUtils.deleteDeltaSubdir(200, 200)), directory.getCurrentDirectories().get(0).getPath());
assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200, 200, 0) : AcidUtils.deltaSubdir(200, 200)), directory.getCurrentDirectories().get(1).getPath());
Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET);
Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
AcidUtils.setAcidOperationalProperties(conf, true, null);
conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
// the first "split" is for base/
Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
RecordIdentifier id = merger.createKey();
OrcStruct event = merger.createValue();
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
assertEquals("second", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
assertEquals("fifth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
assertEquals("sixth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
assertEquals("seventh", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
assertEquals("tenth", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// second "split" is delta_200_200
baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// now run as if it's a minor Compaction so we don't collapse events
// here there is only 1 "split" since we only have data for 1 bucket
merger = new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
// minor comp, so we ignore 'base_0000100' files so all Deletes end up first since
// they all modify primordial rows
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
// data from delta_200_200
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// now run as if it's a major Compaction so we collapse events
// here there is only 1 "split" since we only have data for 1 bucket
baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
assertEquals("second", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
assertEquals("fifth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
assertEquals("sixth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
assertEquals("seventh", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
assertEquals("tenth", getValue(event));
// data from delta_200_200
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// try ignoring the 200 transaction and make sure it works still
ValidWriteIdList writeIds = new ValidReaderWriteIdList("testNewBaseAndDelta:2000:200:200");
// again 1st split is for base/
baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
for (int i = 0; i < values.length; ++i) {
assertEquals(true, merger.next(id, event));
LOG.info("id = " + id + "event = " + event);
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, i, 0), id);
assertEquals(values[i], getValue(event));
}
assertEquals(false, merger.next(id, event));
merger.close();
// 2nd split is for delta_200_200 which is filtered out entirely by "txns"
baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(false, merger.next(id, event));
merger.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcRawRecordMerger method testRecordReaderDelta.
/**
* Test the RecordReader when there is a new base and a delta.
* @throws Exception
*/
@Test
public void testRecordReaderDelta() throws Exception {
final int BUCKET = 0;
Configuration conf = new Configuration();
OrcOutputFormat of = new OrcOutputFormat();
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs);
fs.delete(root, true);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
// write a delta
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(BUCKET).inspector(inspector).filesystem(fs).writingBase(false).minimumWriteId(1).maximumWriteId(1).finalDestination(root);
RecordUpdater ru = of.getRecordUpdater(root, options);
String[][] values = { new String[] { "a", "b", "c", "d", "e" }, new String[] { "f", "g", "h", "i", "j" } };
for (int i = 0; i < values[0].length; ++i) {
ru.insert(1, new MyRow(values[0][i]));
}
ru.close(false);
// write a delta
options.minimumWriteId(2).maximumWriteId(2);
ru = of.getRecordUpdater(root, options);
for (int i = 0; i < values[1].length; ++i) {
ru.insert(2, new MyRow(values[1][i]));
}
ru.close(false);
InputFormat inf = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.min.split.size", "1");
job.set("mapred.max.split.size", "2");
job.set("mapred.input.dir", root.toString());
job.set("bucket_count", "1");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
AcidUtils.setAcidOperationalProperties(job, true, null);
job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
InputSplit[] splits = inf.getSplits(job, 5);
assertEquals(2, splits.length);
org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
for (int j = 0; j < splits.length; j++) {
InputSplit split = splits[j];
rr = inf.getRecordReader(split, job, Reporter.NULL);
OrcStruct row = rr.createValue();
for (int i = 0; i < values[j].length; ++i) {
System.out.println("Checking " + i);
String msg = "split[" + j + "] at i=" + i;
assertEquals(msg, true, rr.next(NullWritable.get(), row));
assertEquals(msg, values[j][i], row.getFieldValue(0).toString());
}
assertEquals(false, rr.next(NullWritable.get(), row));
}
}
Aggregations