use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestRCFile method partialReadTest.
private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException {
LOG.debug("reading " + count + " records");
long start = System.currentTimeMillis();
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(2));
readCols.add(Integer.valueOf(3));
ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals("Field size should be 8", 8, fieldRefs.size());
for (int i : readCols) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
}
assertEquals("Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass());
BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
assertEquals("Serialized data", patialS, serializedBytes);
}
reader.close();
long cost = System.currentTimeMillis() - start;
LOG.debug("reading fully costs:" + cost + " milliseconds");
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testCombinationInputFormat.
// test non-vectorized, non-acid, combine
@Test
public void testCombinationInputFormat() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combination", inspector, false, 1);
// write the orc file to the mock file system
Path partDir = new Path(conf.get("mapred.input.dir"));
Writer writer = OrcFile.createWriter(new Path(partDir, "0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
Path path = new Path("mock:/combination/p=0/0_0");
setBlocks(path, conf, new MockBlock("host0", "host1"));
MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf);
int length0 = getLength(path, conf);
writer = OrcFile.createWriter(new Path(partDir, "1_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 10; i < 20; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
Path path1 = new Path("mock:/combination/p=0/1_0");
setBlocks(path1, conf, new MockBlock("host1", "host2"));
// call getsplits
HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 1);
assertEquals(1, splits.length);
CombineHiveInputFormat.CombineHiveInputSplit split = (CombineHiveInputFormat.CombineHiveInputSplit) splits[0];
// check split
assertEquals(2, split.getNumPaths());
assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
assertEquals(length0, split.getLength(0));
assertEquals(getLength(path1, conf), split.getLength(1));
assertEquals(0, split.getOffset(0));
assertEquals(0, split.getOffset(1));
// hadoop-1 gets 3 and hadoop-2 gets 0. *sigh*
// best answer would be 1.
assertTrue(3 >= split.getLocations().length);
// read split
org.apache.hadoop.mapred.RecordReader<CombineHiveKey, OrcStruct> reader = inputFormat.getRecordReader(split, conf, Reporter.NULL);
CombineHiveKey key = reader.createKey();
OrcStruct value = reader.createValue();
for (int i = 0; i < 20; i++) {
assertEquals(true, reader.next(key, value));
assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
}
assertEquals(false, reader.next(key, value));
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeModificationTime.
@Test
public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
// creates the static cache
MockPath mockPath = new MockPath(fs, "mock:///mocktbl2");
conf.set("hive.orc.cache.use.soft.references", "true");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl2
// call-2: open - mock:/mocktbl2/0_0
// call-3: open - mock:/mocktbl2/0_1
assertEquals(3, readOpsDelta);
// change file modification time and look for cache misses
FileSystem fs1 = FileSystem.get(conf);
MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0"));
((MockFileSystem) fs1).touch(mockFile);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl2
// call-2: open - mock:/mocktbl2/0_1
assertEquals(2, readOpsDelta);
// touch the next file
fs1 = FileSystem.get(conf);
mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1"));
((MockFileSystem) fs1).touch(mockFile);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl2
// call-2: open - mock:/mocktbl2/0_0
assertEquals(2, readOpsDelta);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl2
assertEquals(1, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeFileLen.
@Test
public void testSplitGenReadOpsLocalCacheChangeFileLen() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
// creates the static cache
MockPath mockPath = new MockPath(fs, "mock:///mocktbl1");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktable
// call-2: open - mock:/mocktbl1/0_0
// call-3: open - mock:/mocktbl1/0_1
assertEquals(3, readOpsDelta);
// change file length and look for cache misses
fs.clear();
writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 100; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 100; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktable
// call-2: open - mock:/mocktbl1/0_0
// call-3: open - mock:/mocktbl1/0_1
assertEquals(3, readOpsDelta);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
orcInputFormat = new OrcInputFormat();
splits = orcInputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktbl1
assertEquals(1, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testSplitGenReadOps.
@Test
public void testSplitGenReadOps() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
conf.set("mapred.input.dir", "mock:///mocktable");
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
MockPath mockPath = new MockPath(fs, "mock:///mocktable");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: listLocatedStatus - mock:/mocktable
// call-2: open - mock:/mocktable/0_0
// call-3: open - mock:/mocktable/0_1
assertEquals(3, readOpsDelta);
assertEquals(2, splits.length);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
Aggregations