Search in sources :

Example 11 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestRCFile method partialReadTest.

private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(2));
    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.appendReadColumns(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int i : readCols) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
        }
        assertEquals("Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass());
        BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
        assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) LongWritable(org.apache.hadoop.io.LongWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 12 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestInputOutputFormat method testCombinationInputFormat.

// test non-vectorized, non-acid, combine
@Test
public void testCombinationInputFormat() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combination", inspector, false, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    Writer writer = OrcFile.createWriter(new Path(partDir, "0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    Path path = new Path("mock:/combination/p=0/0_0");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf);
    int length0 = getLength(path, conf);
    writer = OrcFile.createWriter(new Path(partDir, "1_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 10; i < 20; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    Path path1 = new Path("mock:/combination/p=0/1_0");
    setBlocks(path1, conf, new MockBlock("host1", "host2"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    assertEquals(1, splits.length);
    CombineHiveInputFormat.CombineHiveInputSplit split = (CombineHiveInputFormat.CombineHiveInputSplit) splits[0];
    // check split
    assertEquals(2, split.getNumPaths());
    assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
    assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
    assertEquals(length0, split.getLength(0));
    assertEquals(getLength(path1, conf), split.getLength(1));
    assertEquals(0, split.getOffset(0));
    assertEquals(0, split.getOffset(1));
    // hadoop-1 gets 3 and hadoop-2 gets 0. *sigh*
    // best answer would be 1.
    assertTrue(3 >= split.getLocations().length);
    // read split
    org.apache.hadoop.mapred.RecordReader<CombineHiveKey, OrcStruct> reader = inputFormat.getRecordReader(split, conf, Reporter.NULL);
    CombineHiveKey key = reader.createKey();
    OrcStruct value = reader.createValue();
    for (int i = 0; i < 20; i++) {
        assertEquals(true, reader.next(key, value));
        assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
    }
    assertEquals(false, reader.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 13 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeModificationTime.

@Test
public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl2");
    conf.set("hive.orc.cache.use.soft.references", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_1
    assertEquals(3, readOpsDelta);
    // change file modification time and look for cache misses
    FileSystem fs1 = FileSystem.get(conf);
    MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_1
    assertEquals(2, readOpsDelta);
    // touch the next file
    fs1 = FileSystem.get(conf);
    mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_0
    assertEquals(2, readOpsDelta);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    assertEquals(1, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 14 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeFileLen.

@Test
public void testSplitGenReadOpsLocalCacheChangeFileLen() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl1");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktbl1/0_0
    // call-3: open - mock:/mocktbl1/0_1
    assertEquals(3, readOpsDelta);
    // change file length and look for cache misses
    fs.clear();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 100; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 100; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktbl1/0_0
    // call-3: open - mock:/mocktbl1/0_1
    assertEquals(3, readOpsDelta);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl1
    assertEquals(1, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 15 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOps.

@Test
public void testSplitGenReadOps() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    conf.set("mapred.input.dir", "mock:///mocktable");
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    MockPath mockPath = new MockPath(fs, "mock:///mocktable");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktable/0_0
    // call-3: open - mock:/mocktable/0_1
    assertEquals(3, readOpsDelta);
    assertEquals(2, splits.length);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23