use of org.apache.hadoop.hive.ql.io.CombineHiveInputFormat in project hive by apache.
the class TestInputOutputFormat method testCombinationInputFormat.
// test non-vectorized, non-acid, combine
@Test
public void testCombinationInputFormat() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combination", inspector, false, 1, MockFileSystem.class.getName());
// write the orc file to the mock file system
Path partDir = new Path(conf.get("mapred.input.dir"));
Writer writer = OrcFile.createWriter(new Path(partDir, "0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
Path path = new Path("mock:/combination/p=0/0_0");
setBlocks(path, conf, new MockBlock("host0", "host1"));
MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf);
int length0 = getLength(path, conf);
writer = OrcFile.createWriter(new Path(partDir, "1_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 10; i < 20; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
Path path1 = new Path("mock:/combination/p=0/1_0");
setBlocks(path1, conf, new MockBlock("host1", "host2"));
// call getsplits
HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 1);
assertEquals(1, splits.length);
CombineHiveInputFormat.CombineHiveInputSplit split = (CombineHiveInputFormat.CombineHiveInputSplit) splits[0];
// check split
assertEquals(2, split.getNumPaths());
assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
assertEquals(length0, split.getLength(0));
assertEquals(getLength(path1, conf), split.getLength(1));
assertEquals(0, split.getOffset(0));
assertEquals(0, split.getOffset(1));
// hadoop-1 gets 3 and hadoop-2 gets 0. *sigh*
// best answer would be 1.
assertTrue(3 >= split.getLocations().length);
// read split
org.apache.hadoop.mapred.RecordReader<CombineHiveKey, OrcStruct> reader = inputFormat.getRecordReader(split, conf, Reporter.NULL);
CombineHiveKey key = reader.createKey();
OrcStruct value = reader.createValue();
for (int i = 0; i < 20; i++) {
assertEquals(true, reader.next(key, value));
assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
}
assertEquals(false, reader.next(key, value));
}
use of org.apache.hadoop.hive.ql.io.CombineHiveInputFormat in project hive by apache.
the class TestInputOutputFormat method testCombinationInputFormatWithAcid.
// test non-vectorized, acid, combine
@Test
public void testCombinationInputFormatWithAcid() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
final int PARTITIONS = 2;
final int BUCKETS = 3;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combinationAcid", inspector, false, PARTITIONS, MockFileSystem.class.getName());
// write the orc file to the mock file system
Path[] partDir = new Path[PARTITIONS];
String[] paths = conf.getStrings("mapred.input.dir");
for (int p = 0; p < PARTITIONS; ++p) {
partDir[p] = new Path(paths[p]);
}
// write a base file in partition 0
OrcRecordUpdater writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir[0]));
for (int i = 0; i < 10; ++i) {
writer.insert(10, new MyRow(i, 2 * i));
}
writer.close(false);
// base file
Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000");
setBlocks(base0, conf, new MockBlock("host1", "host2"));
// write a delta file in partition 0
writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(1).inspector(inspector).finalDestination(partDir[0]));
for (int i = 10; i < 20; ++i) {
writer.insert(10, new MyRow(i, 2 * i));
}
writer.close(false);
Path base1 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00001");
setBlocks(base1, conf, new MockBlock("host1", "host2"));
// write three files in partition 1
for (int bucket = 0; bucket < BUCKETS; ++bucket) {
Path path = new Path(partDir[1], "00000" + bucket + "_0");
Writer orc = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
orc.addRow(new MyRow(1, 2));
orc.close();
setBlocks(path, conf, new MockBlock("host3", "host4"));
}
// call getsplits
conf.setInt(hive_metastoreConstants.BUCKET_COUNT, BUCKETS);
setupAcidProperties(conf, RowType.MYROW);
HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 1);
assertEquals(3, splits.length);
HiveInputFormat.HiveInputSplit split = (HiveInputFormat.HiveInputSplit) splits[0];
assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(702, split.getLength());
split = (HiveInputFormat.HiveInputSplit) splits[1];
assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(726, split.getLength());
CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
assertEquals(BUCKETS, combineSplit.getNumPaths());
for (int bucket = 0; bucket < BUCKETS; ++bucket) {
assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString());
assertEquals(0, combineSplit.getOffset(bucket));
assertEquals(253, combineSplit.getLength(bucket));
}
String[] hosts = combineSplit.getLocations();
assertEquals(2, hosts.length);
}
Aggregations