Search in sources :

Example 1 with Context

use of org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context in project hive by apache.

the class TestInputOutputFormat method testSplitStrategySelection.

@Test
public void testSplitStrategySelection() throws Exception {
    conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
    conf.set(HiveConf.ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb");
    final int[] counts = { 1, 10, 100, 256 };
    final int[] sizes = { 100, 1000 };
    final int[] numSplits = { 1, 9, 10, 11, 99, 111 };
    final String[] strategyResults = new String[] { "ETLSplitStrategy", /* 1 files x 100 size for 1 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 9 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 10 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 11 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 10 files x 100 size for 1 splits */
    "BISplitStrategy", /* 10 files x 100 size for 9 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 10 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 11 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 100 files x 100 size for 1 splits */
    "BISplitStrategy", /* 100 files x 100 size for 9 splits */
    "BISplitStrategy", /* 100 files x 100 size for 10 splits */
    "BISplitStrategy", /* 100 files x 100 size for 11 splits */
    "BISplitStrategy", /* 100 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 100 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 256 files x 100 size for 1 splits */
    "BISplitStrategy", /* 256 files x 100 size for 9 splits */
    "BISplitStrategy", /* 256 files x 100 size for 10 splits */
    "BISplitStrategy", /* 256 files x 100 size for 11 splits */
    "BISplitStrategy", /* 256 files x 100 size for 99 splits */
    "BISplitStrategy", /* 256 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 99 splits */
    "ETLSplitStrategy" };
    int k = 0;
    for (int c : counts) {
        for (int s : sizes) {
            final FileSystem fs = generateMockFiles(c, s);
            for (int n : numSplits) {
                final OrcInputFormat.Context context = new OrcInputFormat.Context(conf, n);
                OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null);
                List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
                assertEquals(1, splitStrategies.size());
                final SplitStrategy splitStrategy = splitStrategies.get(0);
                assertTrue(String.format("Split strategy for %d files x %d size for %d splits", c, s, n), splitStrategy.getClass().getSimpleName().equals(strategyResults[k++]));
            }
        }
    }
    k = 0;
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "0");
    for (int c : counts) {
        for (int s : sizes) {
            final FileSystem fs = generateMockFiles(c, s);
            for (int n : numSplits) {
                final OrcInputFormat.Context context = new OrcInputFormat.Context(conf, n);
                OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null);
                List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
                assertEquals(1, splitStrategies.size());
                final SplitStrategy splitStrategy = splitStrategies.get(0);
                assertTrue(String.format("Split strategy for %d files x %d size for %d splits", c, s, n), splitStrategy.getClass().getSimpleName().equals(strategyResults[k++]));
            }
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) FileSystem(org.apache.hadoop.fs.FileSystem) SplitStrategy(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy) Test(org.junit.Test)

Example 2 with Context

use of org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context in project hive by apache.

the class TestInputOutputFormat method testSplitGenerator.

@Test
public void testSplitGenerator() throws Exception {
    // create a file with 5 blocks spread around the cluster
    long[] stripeSizes = new long[] { 197, 300, 600, 200, 200, 100, 100, 100, 100, 100 };
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/file", 500, createMockOrcFile(stripeSizes), new MockBlock("host1-1", "host1-2", "host1-3"), new MockBlock("host2-1", "host0", "host2-3"), new MockBlock("host0", "host3-2", "host3-3"), new MockBlock("host4-1", "host4-2", "host4-3"), new MockBlock("host5-1", "host5-2", "host5-3")));
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200);
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    List<OrcSplit> results = splitter.call();
    OrcSplit result = results.get(0);
    assertEquals(3, result.getStart());
    assertEquals(497, result.getLength());
    result = results.get(1);
    assertEquals(500, result.getStart());
    assertEquals(600, result.getLength());
    result = results.get(2);
    assertEquals(1100, result.getStart());
    assertEquals(400, result.getLength());
    result = results.get(3);
    assertEquals(1500, result.getStart());
    assertEquals(300, result.getLength());
    result = results.get(4);
    assertEquals(1800, result.getStart());
    assertEquals(200, result.getLength());
    // test min = 0, max = 0 generates each stripe
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 0);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 0);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    results = splitter.call();
    for (int i = 0; i < stripeSizes.length; ++i) {
        assertEquals("checking stripe " + i + " size", stripeSizes[i], results.get(i).getLength());
    }
}
Also used : Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Test(org.junit.Test)

Example 3 with Context

use of org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context in project hive by apache.

the class TestInputOutputFormat method testACIDSplitStrategy.

@Test
public void testACIDSplitStrategy() throws Exception {
    conf.set("bucket_count", "2");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1")));
    OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator();
    for (OrcSplit split : splits) {
        assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split));
    }
    assertEquals(2, splits.size());
}
Also used : Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) SplitStrategy(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy) ColumnarSplitSizeEstimator(org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator) Test(org.junit.Test)

Example 4 with Context

use of org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context in project hive by apache.

the class TestInputOutputFormat method testProjectedColumnSize.

@Test
public void testProjectedColumnSize() throws Exception {
    long[] stripeSizes = new long[] { 200, 200, 200, 200, 100 };
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/file", 500, createMockOrcFile(stripeSizes), new MockBlock("host1-1", "host1-2", "host1-3"), new MockBlock("host2-1", "host0", "host2-3"), new MockBlock("host0", "host3-2", "host3-3"), new MockBlock("host4-1", "host4-2", "host4-3"), new MockBlock("host5-1", "host5-2", "host5-3")));
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200);
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    List<OrcSplit> results = splitter.call();
    OrcSplit result = results.get(0);
    assertEquals(3, results.size());
    assertEquals(3, result.getStart());
    assertEquals(400, result.getLength());
    assertEquals(167468, result.getProjectedColumnsUncompressedSize());
    result = results.get(1);
    assertEquals(403, result.getStart());
    assertEquals(400, result.getLength());
    assertEquals(167468, result.getProjectedColumnsUncompressedSize());
    result = results.get(2);
    assertEquals(803, result.getStart());
    assertEquals(100, result.getLength());
    assertEquals(41867, result.getProjectedColumnsUncompressedSize());
    // test min = 0, max = 0 generates each stripe
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 0);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 0);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    results = splitter.call();
    assertEquals(5, results.size());
    for (int i = 0; i < stripeSizes.length; ++i) {
        assertEquals("checking stripe " + i + " size", stripeSizes[i], results.get(i).getLength());
        if (i == stripeSizes.length - 1) {
            assertEquals(41867, results.get(i).getProjectedColumnsUncompressedSize());
        } else {
            assertEquals(83734, results.get(i).getProjectedColumnsUncompressedSize());
        }
    }
    // single split
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 1000);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 100000);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    results = splitter.call();
    assertEquals(1, results.size());
    result = results.get(0);
    assertEquals(3, result.getStart());
    assertEquals(900, result.getLength());
    assertEquals(376804, result.getProjectedColumnsUncompressedSize());
}
Also used : Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Test(org.junit.Test)

Example 5 with Context

use of org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context in project hive by apache.

the class TestInputOutputFormat method testACIDSplitStrategyForSplitUpdate.

@Test
public void testACIDSplitStrategyForSplitUpdate() throws Exception {
    conf.set("bucket_count", "2");
    conf.set(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true");
    conf.set(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "default");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    // Case 1: Test with just originals => Single split strategy with two splits.
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")));
    OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertTrue(splits.get(1).isOriginal());
    // Case 2: Test with originals and base => Single split strategy with two splits on compacted
    // base since the presence of a base will make the originals obsolete.
    fs = new MockFileSystem(conf, new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/base_0000001/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/base_0000001/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/base_0000001/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/base_0000001/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());
    // Case 3: Test with originals and deltas => Two split strategies with two splits for each.
    fs = new MockFileSystem(conf, new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(2, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertTrue(splits.get(1).isOriginal());
    assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(1)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());
    // Case 4: Test with originals and deltas but now with only one bucket covered, i.e. we will
    // have originals & insert_deltas for only one bucket, but the delete_deltas will be for two
    // buckets => Two strategies with one split for each.
    // When split-update is enabled, we do not need to account for buckets that aren't covered.
    // The reason why we are able to do so is because the valid user data has already been considered
    // as base for the covered buckets. Hence, the uncovered buckets do not have any relevant
    // data and we can just ignore them.
    fs = new MockFileSystem(conf, new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(2, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(1, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(1)).getSplits();
    assertEquals(1, splits.size());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    // Case 5: Test with originals, compacted_base, insert_deltas, delete_deltas (exhaustive test)
    // This should just generate one strategy with splits for base and insert_deltas.
    fs = new MockFileSystem(conf, new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/base_0000001/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/base_0000001/bucket_00001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_0000002_0000002_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_0000002_0000002_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(4, splits.size());
    assertEquals("mock:/a/base_0000001/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/base_0000001/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000002_0000002_0000/bucket_00000", splits.get(2).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000002_0000002_0000/bucket_00001", splits.get(3).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());
    assertFalse(splits.get(2).isOriginal());
    assertFalse(splits.get(3).isOriginal());
}
Also used : Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) Context(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context) SplitStrategy(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy) Test(org.junit.Test)

Aggregations

Context (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context)10 Test (org.junit.Test)10 SplitStrategy (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy)6 ArrayList (java.util.ArrayList)3 Path (org.apache.hadoop.fs.Path)3 AcidInputFormat (org.apache.hadoop.hive.ql.io.AcidInputFormat)3 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 URISyntaxException (java.net.URISyntaxException)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 ColumnarSplitSizeEstimator (org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)1