Search in sources :

Example 1 with ValidCompactorWriteIdList

use of org.apache.hadoop.hive.common.ValidCompactorWriteIdList in project hive by apache.

the class TxnUtils method createValidCompactWriteIdList.

/**
 * Transform a {@link org.apache.hadoop.hive.metastore.api.TableValidWriteIds} to a
 * {@link org.apache.hadoop.hive.common.ValidCompactorWriteIdList}.  This assumes that the caller intends to
 * compact the files, and thus treats only open transactions/write ids as invalid.  Additionally any
 * writeId > highestOpenWriteId is also invalid.  This is to avoid creating something like
 * delta_17_120 where writeId 80, for example, is still open.
 * @param tableValidWriteIds table write id list from the metastore
 * @return a valid write id list.
 */
public static ValidCompactorWriteIdList createValidCompactWriteIdList(TableValidWriteIds tableValidWriteIds) {
    String fullTableName = tableValidWriteIds.getFullTableName();
    long highWater = tableValidWriteIds.getWriteIdHighWaterMark();
    long minOpenWriteId = Long.MAX_VALUE;
    List<Long> invalids = tableValidWriteIds.getInvalidWriteIds();
    BitSet abortedBits = BitSet.valueOf(tableValidWriteIds.getAbortedBits());
    long[] exceptions = new long[invalids.size()];
    int i = 0;
    for (long writeId : invalids) {
        if (abortedBits.get(i)) {
            // Only need aborted since we don't consider anything above minOpenWriteId
            exceptions[i++] = writeId;
        } else {
            minOpenWriteId = Math.min(minOpenWriteId, writeId);
        }
    }
    if (i < exceptions.length) {
        exceptions = Arrays.copyOf(exceptions, i);
    }
    highWater = minOpenWriteId == Long.MAX_VALUE ? highWater : minOpenWriteId - 1;
    BitSet bitSet = new BitSet(exceptions.length);
    // for ValidCompactorWriteIdList, everything in exceptions are aborted
    bitSet.set(0, exceptions.length);
    if (minOpenWriteId == Long.MAX_VALUE) {
        return new ValidCompactorWriteIdList(fullTableName, exceptions, bitSet, highWater);
    } else {
        return new ValidCompactorWriteIdList(fullTableName, exceptions, bitSet, highWater, minOpenWriteId);
    }
}
Also used : ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) BitSet(java.util.BitSet)

Example 2 with ValidCompactorWriteIdList

use of org.apache.hadoop.hive.common.ValidCompactorWriteIdList in project hive by apache.

the class TestAcidUtils method deltasWithOpenTxnsNotInCompact.

@Test
public void deltasWithOpenTxnsNotInCompact() throws Exception {
    Configuration conf = new Configuration();
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]));
    Path part = new MockPath(fs, "mock:/tbl/part1");
    AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE));
    List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories();
    assertEquals(1, delts.size());
    assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString());
}
Also used : Path(org.apache.hadoop.fs.Path) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) Configuration(org.apache.hadoop.conf.Configuration) MockFile(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFile) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) MockFileSystem(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFileSystem) Test(org.junit.Test)

Example 3 with ValidCompactorWriteIdList

use of org.apache.hadoop.hive.common.ValidCompactorWriteIdList in project hive by apache.

the class TestAcidUtils method deltasAndDeleteDeltasWithOpenTxnsNotInCompact.

@Test
public void deltasAndDeleteDeltasWithOpenTxnsNotInCompact() throws Exception {
    // This tests checks that appropriate delta and delete_deltas are included when minor
    // compactions specifies a valid open txn range.
    Configuration conf = new Configuration();
    conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt());
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_2/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_7_7/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0]));
    Path part = new MockPath(fs, "mock:/tbl/part1");
    AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE + ":"));
    List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories();
    assertEquals(2, delts.size());
    assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString());
    assertEquals("mock:/tbl/part1/delete_delta_2_2", delts.get(1).getPath().toString());
}
Also used : Path(org.apache.hadoop.fs.Path) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) Configuration(org.apache.hadoop.conf.Configuration) MockFile(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFile) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) MockFileSystem(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFileSystem) Test(org.junit.Test)

Example 4 with ValidCompactorWriteIdList

use of org.apache.hadoop.hive.common.ValidCompactorWriteIdList in project hive by apache.

the class TestAcidUtils method deltasWithOpenTxnsNotInCompact2.

@Test
public void deltasWithOpenTxnsNotInCompact2() throws Exception {
    Configuration conf = new Configuration();
    MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0]));
    Path part = new MockPath(fs, "mock:/tbl/part1");
    AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:3:" + Long.MAX_VALUE));
    List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories();
    assertEquals(1, delts.size());
    assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString());
}
Also used : Path(org.apache.hadoop.fs.Path) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) Configuration(org.apache.hadoop.conf.Configuration) MockFile(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFile) MockPath(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath) MockFileSystem(org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFileSystem) Test(org.junit.Test)

Aggregations

ValidCompactorWriteIdList (org.apache.hadoop.hive.common.ValidCompactorWriteIdList)4 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 MockFile (org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFile)3 MockFileSystem (org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFileSystem)3 MockPath (org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath)3 Test (org.junit.Test)3 BitSet (java.util.BitSet)1