use of java.util.BitSet in project hadoop by apache.
the class TestCombineTextInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws Exception {
JobConf job = new JobConf(defaultConf);
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
createFiles(length, numFiles, random);
// create a combined split for the files
CombineTextInputFormat format = new CombineTextInputFormat();
LongWritable key = new LongWritable();
Text value = new Text();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / 20) + 1;
LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
LOG.info("splitting: got = " + splits.length);
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.length);
InputSplit split = splits[0];
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
LOG.debug("split= " + split);
RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter);
try {
int count = 0;
while (reader.next(key, value)) {
int v = Integer.parseInt(value.toString());
LOG.debug("read " + v);
if (bits.get(v)) {
LOG.warn("conflict with " + v + " at position " + reader.getPos());
}
assertFalse("Key in multiple partitions.", bits.get(v));
bits.set(v);
count++;
}
LOG.info("splits=" + split + " count=" + count);
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of java.util.BitSet in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
Reporter reporter = Reporter.NULL;
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create a file with various lengths
createFiles(length, numFiles, random);
// create a combine split for the files
InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
LOG.info("splitting: got = " + splits.length);
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.length);
InputSplit split = splits[0];
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check each split
BitSet bits = new BitSet(length);
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(split, job, reporter);
try {
while (reader.next(key, value)) {
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of java.util.BitSet in project hive by apache.
the class TestWorker method compactNoBaseLotsOfDeltas.
private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception {
conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 2);
Table t = newTable("default", "mapwb", true);
Partition p = newPartition(t, "today");
// addBaseFile(t, p, 20L, 20);
addDeltaFile(t, p, 21L, 21L, 2);
addDeltaFile(t, p, 23L, 23L, 2);
//make it look like streaming API use case
addDeltaFile(t, p, 25L, 29L, 2);
addDeltaFile(t, p, 31L, 32L, 3);
//make it looks like 31-32 has been compacted, but not cleaned
addDeltaFile(t, p, 31L, 33L, 5);
addDeltaFile(t, p, 35L, 35L, 1);
/*since COMPACTOR_MAX_NUM_DELTA=2,
we expect files 1,2 to be minor compacted by 1 job to produce delta_21_23
* 3,5 to be minor compacted by 2nd job (file 4 is obsolete) to make delta_25_33 (4th is skipped)
*
* and then the 'requested'
* minor compaction to combine delta_21_23, delta_25_33 and delta_35_35 to make delta_21_35
* or major compaction to create base_35*/
burnThroughTransactions(35);
CompactionRequest rqst = new CompactionRequest("default", "mapwb", type);
rqst.setPartitionname("ds=today");
txnHandler.compact(rqst);
startWorker();
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation()));
Assert.assertEquals(9, stat.length);
// Find the new delta file and make sure it has the right contents
BitSet matchesFound = new BitSet(9);
for (int i = 0; i < stat.length; i++) {
if (stat[i].getPath().getName().equals(makeDeltaDirName(21, 21))) {
matchesFound.set(0);
} else if (stat[i].getPath().getName().equals(makeDeltaDirName(23, 23))) {
matchesFound.set(1);
} else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25, 29))) {
matchesFound.set(2);
} else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 32))) {
matchesFound.set(3);
} else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 33))) {
matchesFound.set(4);
} else if (stat[i].getPath().getName().equals(makeDeltaDirName(35, 35))) {
matchesFound.set(5);
} else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 23))) {
matchesFound.set(6);
} else if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25, 33))) {
matchesFound.set(7);
}
switch(type) {
//yes, both do set(8)
case MINOR:
if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 35))) {
matchesFound.set(8);
}
break;
case MAJOR:
if (stat[i].getPath().getName().equals(AcidUtils.baseDir(35))) {
matchesFound.set(8);
}
break;
default:
throw new IllegalStateException();
}
}
StringBuilder sb = null;
for (int i = 0; i < stat.length; i++) {
if (!matchesFound.get(i)) {
if (sb == null) {
sb = new StringBuilder("Some files are missing at index: ");
}
sb.append(i).append(",");
}
}
if (sb != null) {
Assert.assertTrue(sb.toString(), false);
}
}
use of java.util.BitSet in project hive by apache.
the class ColumnBuffer method extractSubset.
/**
* Get a subset of this ColumnBuffer, starting from the 1st value.
*
* @param end index after the last value to include
*/
public ColumnBuffer extractSubset(int end) {
BitSet subNulls = nulls.get(0, end);
if (type == Type.BOOLEAN_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(boolVars, 0, end));
boolVars = Arrays.copyOfRange(boolVars, end, size);
nulls = nulls.get(end, size);
size = boolVars.length;
return subset;
}
if (type == Type.TINYINT_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(byteVars, 0, end));
byteVars = Arrays.copyOfRange(byteVars, end, size);
nulls = nulls.get(end, size);
size = byteVars.length;
return subset;
}
if (type == Type.SMALLINT_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(shortVars, 0, end));
shortVars = Arrays.copyOfRange(shortVars, end, size);
nulls = nulls.get(end, size);
size = shortVars.length;
return subset;
}
if (type == Type.INT_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(intVars, 0, end));
intVars = Arrays.copyOfRange(intVars, end, size);
nulls = nulls.get(end, size);
size = intVars.length;
return subset;
}
if (type == Type.BIGINT_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(longVars, 0, end));
longVars = Arrays.copyOfRange(longVars, end, size);
nulls = nulls.get(end, size);
size = longVars.length;
return subset;
}
if (type == Type.DOUBLE_TYPE || type == Type.FLOAT_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, Arrays.copyOfRange(doubleVars, 0, end));
doubleVars = Arrays.copyOfRange(doubleVars, end, size);
nulls = nulls.get(end, size);
size = doubleVars.length;
return subset;
}
if (type == Type.BINARY_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, binaryVars.subList(0, end));
binaryVars = binaryVars.subList(end, binaryVars.size());
nulls = nulls.get(end, size);
size = binaryVars.size();
return subset;
}
if (type == Type.STRING_TYPE) {
ColumnBuffer subset = new ColumnBuffer(type, subNulls, stringVars.subList(0, end));
stringVars = stringVars.subList(end, stringVars.size());
nulls = nulls.get(end, size);
size = stringVars.size();
return subset;
}
throw new IllegalStateException("invalid union object");
}
use of java.util.BitSet in project hive by apache.
the class HiveAggregateJoinTransposeRule method populateEquivalence.
private static void populateEquivalence(Map<Integer, BitSet> equivalence, int i0, int i1) {
BitSet bitSet = equivalence.get(i0);
if (bitSet == null) {
bitSet = new BitSet();
equivalence.put(i0, bitSet);
}
bitSet.set(i1);
}
Aggregations