Examples with ObjectInspectorFactory.getReflectionObjectInspector - org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector

Example 36 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testCombinationInputFormatWithAcid.

// test non-vectorized, acid, combine
@Test
public void testCombinationInputFormatWithAcid() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    final int PARTITIONS = 2;
    final int BUCKETS = 3;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combinationAcid", inspector, false, PARTITIONS);
    // write the orc file to the mock file system
    Path[] partDir = new Path[PARTITIONS];
    String[] paths = conf.getStrings("mapred.input.dir");
    for (int p = 0; p < PARTITIONS; ++p) {
        partDir[p] = new Path(paths[p]);
    }
    // write a base file in partition 0
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir[0]));
    for (int i = 0; i < 10; ++i) {
        writer.insert(10, new MyRow(i, 2 * i));
    }
    writer.close(false);
    // base file
    Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(base0, conf, new MockBlock("host1", "host2"));
    // write a delta file in partition 0
    writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(1).inspector(inspector).finalDestination(partDir[0]));
    for (int i = 10; i < 20; ++i) {
        writer.insert(10, new MyRow(i, 2 * i));
    }
    writer.close(false);
    Path base1 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00001");
    setBlocks(base1, conf, new MockBlock("host1", "host2"));
    // write three files in partition 1
    for (int bucket = 0; bucket < BUCKETS; ++bucket) {
        Path path = new Path(partDir[1], "00000" + bucket + "_0");
        Writer orc = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
        orc.addRow(new MyRow(1, 2));
        orc.close();
        setBlocks(path, conf, new MockBlock("host3", "host4"));
    }
    // call getsplits
    conf.setInt(hive_metastoreConstants.BUCKET_COUNT, BUCKETS);
    HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    assertEquals(3, splits.length);
    HiveInputFormat.HiveInputSplit split = (HiveInputFormat.HiveInputSplit) splits[0];
    assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
    assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString());
    assertEquals(0, split.getStart());
    assertEquals(679, split.getLength());
    split = (HiveInputFormat.HiveInputSplit) splits[1];
    assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
    assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString());
    assertEquals(0, split.getStart());
    assertEquals(705, split.getLength());
    CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
    assertEquals(BUCKETS, combineSplit.getNumPaths());
    for (int bucket = 0; bucket < BUCKETS; ++bucket) {
        assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString());
        assertEquals(0, combineSplit.getOffset(bucket));
        assertEquals(241, combineSplit.getLength(bucket));
    }
    String[] hosts = combineSplit.getLocations();
    assertEquals(2, hosts.length);
}

Also used : CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 37 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testVectorReaderNoFooterSerialize.

@Test
public void testVectorReaderNoFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable3");
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "mocktable3", inspector, true, 0);
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=false"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue(split.toString().contains("deltas=0"));
        if (split instanceof OrcSplit) {
            assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        }
        orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable3/0_0
    // call-2: open to read data - split 1 => mock:/mocktable3/0_0
    // call-3: open to read footer - split 2 => mock:/mocktable3/0_1
    // call-4: open to read data - split 2 => mock:/mocktable3/0_1
    assertEquals(4, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}

Also used : JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 38 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeModificationTime.

@Test
public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl2");
    conf.set("hive.orc.cache.use.soft.references", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_0
    // call-4: check side file for  mock:/mocktbl2/0_1
    // call-5: open - mock:/mocktbl2/0_1
    assertEquals(5, readOpsDelta);
    // change file modification time and look for cache misses
    FileSystem fs1 = FileSystem.get(conf);
    MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for  mock:/mocktbl2/0_1
    // call-3: open - mock:/mocktbl2/0_1
    assertEquals(3, readOpsDelta);
    // touch the next file
    fs1 = FileSystem.get(conf);
    mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for  mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_0
    assertEquals(3, readOpsDelta);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    assertEquals(1, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}

Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 39 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOps.

@Test
public void testSplitGenReadOps() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    conf.set("mapred.input.dir", "mock:///mocktable");
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    MockPath mockPath = new MockPath(fs, "mock:///mocktable");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: check existence of side file for mock:/mocktable/0_0
    // call-3: open - mock:/mocktable/0_0
    // call-4: check existence of side file for mock:/mocktable/0_1
    // call-5: open - mock:/mocktable/0_1
    assertEquals(5, readOpsDelta);
    assertEquals(2, splits.length);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}

Example 40 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testACIDReaderFooterSerializeWithDeltas.

@Test
public void testACIDReaderFooterSerializeWithDeltas() throws Exception {
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    // ensures that FS object is cached so that everyone uses the same instance
    FileSystem fs = FileSystem.get(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable8");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumWriteId(1).maximumWriteId(1).inspector(inspector).finalDestination(mockPath);
    OrcOutputFormat of = new OrcOutputFormat();
    RecordUpdater ru = of.getRecordUpdater(mockPath, options);
    for (int i = 0; i < 10; ++i) {
        ru.insert(options.getMinimumWriteId(), new MyRow(i, 2 * i));
    }
    // this deletes the side file
    ru.close(false);
    // set up props for read
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    AcidUtils.setAcidOperationalProperties(conf, true, null);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=true"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read data - split 1 => mock:/mocktable8/0_0
    // call-2: listLocatedFileStatus(mock:/mocktable8)
    // call-3: getFileStatus(mock:/mocktable8/delta_0000001_0000001_0000/_metadata_acid)
    // call-4: getFileStatus(mock:/mocktable8/delta_0000001_0000001_0000/_metadata_acid)
    // call-5: open(mock:/mocktable8/delta_0000001_0000001_0000/bucket_00001)
    assertEquals(5, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}

Also used : AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)73 Test (org.junit.Test)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)60 Configuration (org.apache.hadoop.conf.Configuration)25 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 RecordWriter (org.apache.hadoop.mapred.RecordWriter)18