Search in sources :

Example 21 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestCombineTextInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);
    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    createFiles(length, numFiles, random);
    // create a combined split for the files
    CombineTextInputFormat format = new CombineTextInputFormat();
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / 20) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        LOG.info("splitting: got =        " + splits.length);
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.length);
        InputSplit split = splits[0];
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        LOG.debug("split= " + split);
        RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter);
        try {
            int count = 0;
            while (reader.next(key, value)) {
                int v = Integer.parseInt(value.toString());
                LOG.debug("read " + v);
                if (bits.get(v)) {
                    LOG.warn("conflict with " + v + " at position " + reader.getPos());
                }
                assertFalse("Key in multiple partitions.", bits.get(v));
                bits.set(v);
                count++;
            }
            LOG.info("splits=" + split + " count=" + count);
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapred.lib.CombineTextInputFormat) Random(java.util.Random) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 22 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    Reporter reporter = Reporter.NULL;
    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create a file with various lengths
    createFiles(length, numFiles, random);
    // create a combine split for the files
    InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
    IntWritable key = new IntWritable();
    BytesWritable value = new BytesWritable();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        LOG.info("splitting: got =        " + splits.length);
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.length);
        InputSplit split = splits[0];
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check each split
        BitSet bits = new BitSet(length);
        RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(split, job, reporter);
        try {
            while (reader.next(key, value)) {
                assertFalse("Key in multiple partitions.", bits.get(key.get()));
                bits.set(key.get());
            }
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : CombineSequenceFileInputFormat(org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat) BitSet(java.util.BitSet) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 23 with BitSet

use of java.util.BitSet in project hive by apache.

the class TestWorker method compactNoBaseLotsOfDeltas.

private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception {
    conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 2);
    Table t = newTable("default", "mapwb", true);
    Partition p = newPartition(t, "today");
    //    addBaseFile(t, p, 20L, 20);
    addDeltaFile(t, p, 21L, 21L, 2);
    addDeltaFile(t, p, 23L, 23L, 2);
    //make it look like streaming API use case
    addDeltaFile(t, p, 25L, 29L, 2);
    addDeltaFile(t, p, 31L, 32L, 3);
    //make it looks like 31-32 has been compacted, but not cleaned
    addDeltaFile(t, p, 31L, 33L, 5);
    addDeltaFile(t, p, 35L, 35L, 1);
    /*since COMPACTOR_MAX_NUM_DELTA=2,
    we expect files 1,2 to be minor compacted by 1 job to produce delta_21_23
    * 3,5 to be minor compacted by 2nd job (file 4 is obsolete) to make delta_25_33 (4th is skipped)
    *
    * and then the 'requested'
    * minor compaction to combine delta_21_23, delta_25_33 and delta_35_35 to make delta_21_35
    * or major compaction to create base_35*/
    burnThroughTransactions(35);
    CompactionRequest rqst = new CompactionRequest("default", "mapwb", type);
    rqst.setPartitionname("ds=today");
    txnHandler.compact(rqst);
    startWorker();
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(1, compacts.size());
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation()));
    Assert.assertEquals(9, stat.length);
    // Find the new delta file and make sure it has the right contents
    BitSet matchesFound = new BitSet(9);
    for (int i = 0; i < stat.length; i++) {
        if (stat[i].getPath().getName().equals(makeDeltaDirName(21, 21))) {
            matchesFound.set(0);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirName(23, 23))) {
            matchesFound.set(1);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25, 29))) {
            matchesFound.set(2);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 32))) {
            matchesFound.set(3);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 33))) {
            matchesFound.set(4);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirName(35, 35))) {
            matchesFound.set(5);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 23))) {
            matchesFound.set(6);
        } else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25, 33))) {
            matchesFound.set(7);
        }
        switch(type) {
            //yes, both do set(8)
            case MINOR:
                if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 35))) {
                    matchesFound.set(8);
                }
                break;
            case MAJOR:
                if (stat[i].getPath().getName().equals(AcidUtils.baseDir(35))) {
                    matchesFound.set(8);
                }
                break;
            default:
                throw new IllegalStateException();
        }
    }
    StringBuilder sb = null;
    for (int i = 0; i < stat.length; i++) {
        if (!matchesFound.get(i)) {
            if (sb == null) {
                sb = new StringBuilder("Some files are missing at index: ");
            }
            sb.append(i).append(",");
        }
    }
    if (sb != null) {
        Assert.assertTrue(sb.toString(), false);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) BitSet(java.util.BitSet) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)

Example 24 with BitSet

use of java.util.BitSet in project hive by apache.

the class ColumnBuffer method extractSubset.

/**
   * Get a subset of this ColumnBuffer, starting from the 1st value.
   *
   * @param end index after the last value to include
   */
public ColumnBuffer extractSubset(int end) {
    BitSet subNulls = nulls.get(0, end);
    if (type == Type.BOOLEAN_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(boolVars, 0, end));
        boolVars = Arrays.copyOfRange(boolVars, end, size);
        nulls = nulls.get(end, size);
        size = boolVars.length;
        return subset;
    }
    if (type == Type.TINYINT_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(byteVars, 0, end));
        byteVars = Arrays.copyOfRange(byteVars, end, size);
        nulls = nulls.get(end, size);
        size = byteVars.length;
        return subset;
    }
    if (type == Type.SMALLINT_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(shortVars, 0, end));
        shortVars = Arrays.copyOfRange(shortVars, end, size);
        nulls = nulls.get(end, size);
        size = shortVars.length;
        return subset;
    }
    if (type == Type.INT_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(intVars, 0, end));
        intVars = Arrays.copyOfRange(intVars, end, size);
        nulls = nulls.get(end, size);
        size = intVars.length;
        return subset;
    }
    if (type == Type.BIGINT_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(longVars, 0, end));
        longVars = Arrays.copyOfRange(longVars, end, size);
        nulls = nulls.get(end, size);
        size = longVars.length;
        return subset;
    }
    if (type == Type.DOUBLE_TYPE || type == Type.FLOAT_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(doubleVars, 0, end));
        doubleVars = Arrays.copyOfRange(doubleVars, end, size);
        nulls = nulls.get(end, size);
        size = doubleVars.length;
        return subset;
    }
    if (type == Type.BINARY_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, binaryVars.subList(0, end));
        binaryVars = binaryVars.subList(end, binaryVars.size());
        nulls = nulls.get(end, size);
        size = binaryVars.size();
        return subset;
    }
    if (type == Type.STRING_TYPE) {
        ColumnBuffer subset = new ColumnBuffer(type, subNulls, stringVars.subList(0, end));
        stringVars = stringVars.subList(end, stringVars.size());
        nulls = nulls.get(end, size);
        size = stringVars.size();
        return subset;
    }
    throw new IllegalStateException("invalid union object");
}
Also used : BitSet(java.util.BitSet)

Example 25 with BitSet

use of java.util.BitSet in project hive by apache.

the class HiveAggregateJoinTransposeRule method populateEquivalence.

private static void populateEquivalence(Map<Integer, BitSet> equivalence, int i0, int i1) {
    BitSet bitSet = equivalence.get(i0);
    if (bitSet == null) {
        bitSet = new BitSet();
        equivalence.put(i0, bitSet);
    }
    bitSet.set(i1);
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) BitSet(java.util.BitSet)

Aggregations

BitSet (java.util.BitSet)754 ArrayList (java.util.ArrayList)82 Test (org.junit.Test)54 List (java.util.List)29 HashMap (java.util.HashMap)27 Map (java.util.Map)26 HashSet (java.util.HashSet)24 RegisterSpecList (com.android.dx.rop.code.RegisterSpecList)22 RegisterSpecList (com.taobao.android.dx.rop.code.RegisterSpecList)22 Random (java.util.Random)21 Cursor (android.database.Cursor)20 MatrixCursor (android.database.MatrixCursor)20 MergeCursor (android.database.MergeCursor)20 DirectoryResult (com.android.documentsui.DirectoryResult)20 LinkedList (java.util.LinkedList)16 IOException (java.io.IOException)14 Iterator (java.util.Iterator)13 BlockNode (jadx.core.dex.nodes.BlockNode)12 BitSetGroup (org.apache.carbondata.core.util.BitSetGroup)12 Directory (org.apache.lucene.store.Directory)12