Search in sources :

Example 71 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOpsLocalCacheChangeModificationTime.

@Test
public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl2");
    conf.set("hive.orc.cache.use.soft.references", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_0
    // call-4: check side file for  mock:/mocktbl2/0_1
    // call-5: open - mock:/mocktbl2/0_1
    assertEquals(5, readOpsDelta);
    // change file modification time and look for cache misses
    FileSystem fs1 = FileSystem.get(conf);
    MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for  mock:/mocktbl2/0_1
    // call-3: open - mock:/mocktbl2/0_1
    assertEquals(3, readOpsDelta);
    // touch the next file
    fs1 = FileSystem.get(conf);
    mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: check side file for  mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_0
    assertEquals(3, readOpsDelta);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    assertEquals(1, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 72 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOps.

@Test
public void testSplitGenReadOps() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    conf.set("mapred.input.dir", "mock:///mocktable");
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    MockPath mockPath = new MockPath(fs, "mock:///mocktable");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: check existence of side file for mock:/mocktable/0_0
    // call-3: open - mock:/mocktable/0_0
    // call-4: check existence of side file for mock:/mocktable/0_1
    // call-5: open - mock:/mocktable/0_1
    assertEquals(5, readOpsDelta);
    assertEquals(2, splits.length);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 73 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class TestInputOutputFormat method testACIDReaderFooterSerializeWithDeltas.

@Test
public void testACIDReaderFooterSerializeWithDeltas() throws Exception {
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    // ensures that FS object is cached so that everyone uses the same instance
    FileSystem fs = FileSystem.get(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable8");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumWriteId(1).maximumWriteId(1).inspector(inspector).finalDestination(mockPath);
    OrcOutputFormat of = new OrcOutputFormat();
    RecordUpdater ru = of.getRecordUpdater(mockPath, options);
    for (int i = 0; i < 10; ++i) {
        ru.insert(options.getMinimumWriteId(), new MyRow(i, 2 * i));
    }
    // this deletes the side file
    ru.close(false);
    // set up props for read
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    AcidUtils.setAcidOperationalProperties(conf, true, null);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=true"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read data - split 1 => mock:/mocktable8/0_0
    // call-2: listLocatedFileStatus(mock:/mocktable8)
    // call-3: getFileStatus(mock:/mocktable8/delta_0000001_0000001_0000/_metadata_acid)
    // call-4: getFileStatus(mock:/mocktable8/delta_0000001_0000001_0000/_metadata_acid)
    // call-5: open(mock:/mocktable8/delta_0000001_0000001_0000/bucket_00001)
    assertEquals(5, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 74 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class TestOrcRawRecordMerger method testRecordReaderNewBaseAndDelta.

/**
 * Test the RecordReader when there is a new base and a delta.
 * This test creates multiple stripes in both base and delta files which affects how many splits
 * are created on read.  With ORC-228 this could be done in E2E fashion with a query or
 * streaming ingest writing data.
 * @see #testRecordReaderOldBaseAndDelta()
 * @throws Exception
 */
@Test
public void testRecordReaderNewBaseAndDelta() throws Exception {
    final int BUCKET = 11;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the base
    MemoryManager mgr = new MemoryManagerImpl(conf) {

        int rowsAddedSinceCheck = 0;

        @Override
        public synchronized void addedRow(int rows) throws IOException {
            rowsAddedSinceCheck += rows;
            if (rowsAddedSinceCheck >= 2) {
                notifyWriters();
                rowsAddedSinceCheck = 0;
            }
        }
    };
    // make 5 stripes with 2 rows each
    OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions) new OrcRecordUpdater.OrcOptions(conf).writingBase(true).minimumWriteId(0).maximumWriteId(0).bucket(BUCKET).inspector(inspector).filesystem(fs);
    final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
    options.orcOptions(OrcFile.writerOptions(conf).stripeSize(1).blockPadding(false).compress(CompressionKind.NONE).memory(mgr).batchSize(2));
    options.finalDestination(root);
    RecordUpdater ru = of.getRecordUpdater(root, options);
    String[] values = new String[] { "ignore.1", "0.1", "ignore.2", "ignore.3", "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(0, new BigRow(i, i, values[i], i, i));
    }
    ru.close(false);
    // write a delta
    options.writingBase(false).minimumWriteId(1).maximumWriteId(1).recordIdColumn(5);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { "0.0", null, null, "1.1", null, null, null, "ignore.7" };
    for (int i = 0; i < values.length; ++i) {
        if (values[i] != null) {
            ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET_PROPERTY));
        }
    }
    ru.delete(1, new BigRow(9, 0, BUCKET_PROPERTY));
    ru.close(false);
    // write a delta
    options.minimumWriteId(100).maximumWriteId(100);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { null, null, "1.0", null, null, null, null, "3.1" };
    for (int i = 0; i < values.length - 1; ++i) {
        if (values[i] != null) {
            ru.update(100, new BigRow(i, i, values[i], i, i, i, 0, BUCKET_PROPERTY));
        }
    }
    // do this before next update so that delte_delta is properly sorted
    ru.delete(100, new BigRow(8, 0, BUCKET_PROPERTY));
    // because row 8 was updated and thus has a different RecordIdentifier now
    ru.update(100, new BigRow(7, 7, values[values.length - 1], 7, 7, 2, 1, BUCKET_PROPERTY));
    ru.close(false);
    MyResult[] expected = new MyResult[10];
    int k = 0;
    expected[k++] = new MyResult(0, "0.0");
    expected[k++] = new MyResult(1, "0.1");
    expected[k++] = new MyResult(2, "1.0");
    expected[k++] = new MyResult(3, "1.1");
    expected[k++] = new MyResult(4, "2.0");
    expected[k++] = new MyResult(5, "2.1");
    expected[k++] = new MyResult(6, "3.0");
    expected[k] = new MyResult(7, "3.1");
    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.min.split.size", "1");
    job.set("mapred.max.split.size", "2");
    job.set("mapred.input.dir", root.toString());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    InputSplit[] splits = inf.getSplits(job, 5);
    // base has 10 rows, so 5 splits, 1 delta has 2 rows so 1 split, and 1 delta has 3 so 2 splits
    assertEquals(8, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    for (InputSplit split : splits) {
        rr = inf.getRecordReader(split, job, Reporter.NULL);
        NullWritable key = rr.createKey();
        OrcStruct value = rr.createValue();
        while (rr.next(key, value)) {
            MyResult mr = new MyResult(Integer.parseInt(value.getFieldValue(0).toString()), value.getFieldValue(2).toString());
            int i = 0;
            for (; i < expected.length; i++) {
                if (mr.equals(expected[i])) {
                    expected[i] = null;
                    break;
                }
            }
            if (i >= expected.length) {
                // not found
                assertTrue("Found unexpected row: " + mr, false);
            }
        }
    }
    for (MyResult mr : expected) {
        assertTrue("Expected " + mr + " not found in any InputSplit", mr == null);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MemoryManager(org.apache.orc.MemoryManager) NullWritable(org.apache.hadoop.io.NullWritable) InputFormat(org.apache.hadoop.mapred.InputFormat) Test(org.junit.Test)

Example 75 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class TestOrcRawRecordMerger method testRecordReaderIncompleteDelta.

/**
 * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001
 */
private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception {
    final int BUCKET = 1;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf).getRaw();
    Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write a base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).writingBase(true).minimumWriteId(0).maximumWriteId(0).bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root);
    if (!use130Format) {
        options.statementId(-1);
    }
    RecordUpdater ru = of.getRecordUpdater(root, options);
    String[] values = new String[] { "1", "2", "3", "4", "5" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(0, new MyRow(values[i]));
    }
    ru.close(false);
    // write a delta
    options.writingBase(false).minimumWriteId(10).maximumWriteId(19);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { "6", "7", "8" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(1, new MyRow(values[i]));
    }
    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", root.toString());
    job.set("bucket_count", "2");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    // read the keys before the delta is flushed
    InputSplit[] splits = inf.getSplits(job, 1);
    // 1 split since we only have 1 bucket file in base/.  delta is not flushed (committed) yet, i.e. empty
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    NullWritable key = rr.createKey();
    OrcStruct value = rr.createValue();
    System.out.println("Looking at split " + splits[0]);
    for (int i = 1; i < 6; ++i) {
        System.out.println("Checking row " + i);
        assertEquals(true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
    ru.flush();
    ru.flush();
    values = new String[] { "9", "10" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(3, new MyRow(values[i]));
    }
    ru.flush();
    splits = inf.getSplits(job, 1);
    assertEquals(2, splits.length);
    Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10, 19, 0) : AcidUtils.deltaSubdir(10, 19)) + "/bucket_00001_flush_length");
    assertEquals(true, fs.exists(sideFile));
    assertEquals(32, fs.getFileStatus(sideFile).getLen());
    rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    for (int i = 1; i <= 5; ++i) {
        assertEquals(true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
    rr = inf.getRecordReader(splits[1], job, Reporter.NULL);
    for (int i = 6; i < 11; ++i) {
        assertEquals("i=" + i, true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) NullWritable(org.apache.hadoop.io.NullWritable) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

InputSplit (org.apache.hadoop.mapred.InputSplit)161 Path (org.apache.hadoop.fs.Path)57 JobConf (org.apache.hadoop.mapred.JobConf)56 Test (org.junit.Test)49 IOException (java.io.IOException)47 ArrayList (java.util.ArrayList)29 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)27 FileSplit (org.apache.hadoop.mapred.FileSplit)24 FileSystem (org.apache.hadoop.fs.FileSystem)21 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)21 InputFormat (org.apache.hadoop.mapred.InputFormat)19 RecordWriter (org.apache.hadoop.mapred.RecordWriter)19 NullWritable (org.apache.hadoop.io.NullWritable)18 Text (org.apache.hadoop.io.Text)18 Configuration (org.apache.hadoop.conf.Configuration)14 LongWritable (org.apache.hadoop.io.LongWritable)11 FileInputFormat (org.apache.hadoop.mapred.FileInputFormat)10 Properties (java.util.Properties)9 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)9 HashMap (java.util.HashMap)8