use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestComparators method configure.
@Before
public void configure() throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = FileSystem.get(conf);
fs.delete(testdir, true);
conf.setInputFormat(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
// set up two map jobs, so we can test merge phase in Reduce also
conf.setNumMapTasks(2);
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
conf.setOutputFormat(SequenceFileOutputFormat.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(11), new IntWritable(999));
writer.append(new IntWritable(23), new IntWritable(456));
writer.append(new IntWritable(10), new IntWritable(780));
writer.close();
inFile = new Path(inDir, "part1");
writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(45), new IntWritable(100));
writer.append(new IntWritable(18), new IntWritable(200));
writer.append(new IntWritable(27), new IntWritable(300));
writer.close();
jc = new JobClient(conf);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
Reporter reporter = Reporter.NULL;
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create a file with various lengths
createFiles(length, numFiles, random);
// create a combine split for the files
InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
LOG.info("splitting: got = " + splits.length);
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.length);
InputSplit split = splits[0];
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check each split
BitSet bits = new BitSet(length);
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(split, job, reporter);
try {
while (reader.next(key, value)) {
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestMapReduce method launch.
private static void launch() throws Exception {
//
// Generate distribution of ints. This is the answer key.
//
Configuration conf = new Configuration();
int countsToGo = counts;
int[] dist = new int[range];
for (int i = 0; i < range; i++) {
double avgInts = (1.0 * countsToGo) / (range - i);
dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
countsToGo -= dist[i];
}
if (countsToGo > 0) {
dist[dist.length - 1] += countsToGo;
}
//
// Write the answer key to a file.
//
Path testdir = new Path(TEST_DIR.getAbsolutePath());
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
Path randomIns = new Path(testdir, "genins");
if (!fs.mkdirs(randomIns)) {
throw new IOException("Mkdirs failed to create " + randomIns.toString());
}
Path answerkey = new Path(randomIns, "answer.key");
SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE);
try {
for (int i = 0; i < range; i++) {
out.append(new IntWritable(i), new IntWritable(dist[i]));
}
} finally {
out.close();
}
printFiles(randomIns, conf);
//
// Now we need to generate the random numbers according to
// the above distribution.
//
// We create a lot of map tasks, each of which takes at least
// one "line" of the distribution. (That is, a certain number
// X is to be generated Y number of times.)
//
// A map task emits Y key/val pairs. The val is X. The key
// is a randomly-generated number.
//
// The reduce task gets its input sorted by key. That is, sorted
// in random order. It then emits a single line of text that
// for the given values. It does not emit the key.
//
// Because there's just one reduce task, we emit a single big
// file of random numbers.
//
Path randomOuts = new Path(testdir, "genouts");
fs.delete(randomOuts, true);
Job genJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(genJob, randomIns);
genJob.setInputFormatClass(SequenceFileInputFormat.class);
genJob.setMapperClass(RandomGenMapper.class);
FileOutputFormat.setOutputPath(genJob, randomOuts);
genJob.setOutputKeyClass(IntWritable.class);
genJob.setOutputValueClass(IntWritable.class);
genJob.setReducerClass(RandomGenReducer.class);
genJob.setNumReduceTasks(1);
genJob.waitForCompletion(true);
printFiles(randomOuts, conf);
//
// Next, we read the big file in and regenerate the
// original map. It's split into a number of parts.
// (That number is 'intermediateReduces'.)
//
// We have many map tasks, each of which read at least one
// of the output numbers. For each number read in, the
// map task emits a key/value pair where the key is the
// number and the value is "1".
//
// We have a single reduce task, which receives its input
// sorted by the key emitted above. For each key, there will
// be a certain number of "1" values. The reduce task sums
// these values to compute how many times the given key was
// emitted.
//
// The reduce task then emits a key/val pair where the key
// is the number in question, and the value is the number of
// times the key was emitted. This is the same format as the
// original answer key (except that numbers emitted zero times
// will not appear in the regenerated key.) The answer set
// is split into a number of pieces. A final MapReduce job
// will merge them.
//
// There's not really a need to go to 10 reduces here
// instead of 1. But we want to test what happens when
// you have multiple reduces at once.
//
int intermediateReduces = 10;
Path intermediateOuts = new Path(testdir, "intermediateouts");
fs.delete(intermediateOuts, true);
Job checkJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(checkJob, randomOuts);
checkJob.setMapperClass(RandomCheckMapper.class);
FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
checkJob.setOutputKeyClass(IntWritable.class);
checkJob.setOutputValueClass(IntWritable.class);
checkJob.setOutputFormatClass(MapFileOutputFormat.class);
checkJob.setReducerClass(RandomCheckReducer.class);
checkJob.setNumReduceTasks(intermediateReduces);
checkJob.waitForCompletion(true);
printFiles(intermediateOuts, conf);
//
// OK, now we take the output from the last job and
// merge it down to a single file. The map() and reduce()
// functions don't really do anything except reemit tuples.
// But by having a single reduce task here, we end up merging
// all the files.
//
Path finalOuts = new Path(testdir, "finalouts");
fs.delete(finalOuts, true);
Job mergeJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
mergeJob.setMapperClass(MergeMapper.class);
FileOutputFormat.setOutputPath(mergeJob, finalOuts);
mergeJob.setOutputKeyClass(IntWritable.class);
mergeJob.setOutputValueClass(IntWritable.class);
mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
mergeJob.setReducerClass(MergeReducer.class);
mergeJob.setNumReduceTasks(1);
mergeJob.waitForCompletion(true);
printFiles(finalOuts, conf);
//
// Finally, we compare the reconstructed answer key with the
// original one. Remember, we need to ignore zero-count items
// in the original key.
//
boolean success = true;
Path recomputedkey = new Path(finalOuts, "part-r-00000");
SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
int totalseen = 0;
try {
IntWritable key = new IntWritable();
IntWritable val = new IntWritable();
for (int i = 0; i < range; i++) {
if (dist[i] == 0) {
continue;
}
if (!in.next(key, val)) {
System.err.println("Cannot read entry " + i);
success = false;
break;
} else {
if (!((key.get() == i) && (val.get() == dist[i]))) {
System.err.println("Mismatch! Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]);
success = false;
}
totalseen += val.get();
}
}
if (success) {
if (in.next(key, val)) {
System.err.println("Unnecessary lines in recomputed key!");
success = false;
}
}
} finally {
in.close();
}
int originalTotal = 0;
for (int i = 0; i < dist.length; i++) {
originalTotal += dist[i];
}
System.out.println("Original sum: " + originalTotal);
System.out.println("Recomputed sum: " + totalseen);
//
// Write to "results" whether the test succeeded or not.
//
Path resultFile = new Path(testdir, "results");
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
try {
bw.write("Success=" + success + "\n");
System.out.println("Success=" + success);
} finally {
bw.close();
}
assertTrue("testMapRed failed", success);
fs.delete(testdir, true);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestPipeApplication method testPipesPartitioner.
/**
* test PipesPartitioner
* test set and get data from PipesPartitioner
*/
@Test
public void testPipesPartitioner() {
PipesPartitioner<IntWritable, Text> partitioner = new PipesPartitioner<IntWritable, Text>();
JobConf configuration = new JobConf();
Submitter.getJavaPartitioner(configuration);
partitioner.configure(new JobConf());
IntWritable iw = new IntWritable(4);
// the cache empty
assertEquals(0, partitioner.getPartition(iw, new Text("test"), 2));
// set data into cache
PipesPartitioner.setNextPartition(3);
// get data from cache
assertEquals(3, partitioner.getPartition(iw, new Text("test"), 2));
}
use of org.apache.hadoop.io.IntWritable in project hive by apache.
the class GenericUDFTrunc method evaluateNumber.
private Object evaluateNumber(DeferredObject[] arguments) throws HiveException, UDFArgumentTypeException {
if (arguments[0] == null) {
return null;
}
Object input = arguments[0].get();
if (input == null) {
return null;
}
if (arguments.length == 2 && arguments[1] != null && arguments[1].get() != null && !inputSacleConst) {
Object scaleObj = null;
switch(inputScaleOI.getPrimitiveCategory()) {
case BYTE:
scaleObj = byteConverter.convert(arguments[1].get());
scale = ((ByteWritable) scaleObj).get();
break;
case SHORT:
scaleObj = shortConverter.convert(arguments[1].get());
scale = ((ShortWritable) scaleObj).get();
break;
case INT:
scaleObj = intConverter.convert(arguments[1].get());
scale = ((IntWritable) scaleObj).get();
break;
case LONG:
scaleObj = longConverter.convert(arguments[1].get());
long l = ((LongWritable) scaleObj).get();
if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
throw new UDFArgumentException(getFuncName().toUpperCase() + " scale argument out of allowed range");
}
scale = (int) l;
default:
break;
}
}
switch(inputType1) {
case VOID:
return null;
case DECIMAL:
HiveDecimalWritable decimalWritable = (HiveDecimalWritable) inputOI.getPrimitiveWritableObject(input);
HiveDecimal dec = trunc(decimalWritable.getHiveDecimal(), scale);
if (dec == null) {
return null;
}
return new HiveDecimalWritable(dec);
case BYTE:
ByteWritable byteWritable = (ByteWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return byteWritable;
} else {
return new ByteWritable((byte) trunc(byteWritable.get(), scale));
}
case SHORT:
ShortWritable shortWritable = (ShortWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return shortWritable;
} else {
return new ShortWritable((short) trunc(shortWritable.get(), scale));
}
case INT:
IntWritable intWritable = (IntWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return intWritable;
} else {
return new IntWritable((int) trunc(intWritable.get(), scale));
}
case LONG:
LongWritable longWritable = (LongWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return longWritable;
} else {
return new LongWritable(trunc(longWritable.get(), scale));
}
case FLOAT:
float f = ((FloatWritable) inputOI.getPrimitiveWritableObject(input)).get();
return new FloatWritable((float) trunc(f, scale));
case DOUBLE:
return trunc(((DoubleWritable) inputOI.getPrimitiveWritableObject(input)), scale);
default:
throw new UDFArgumentTypeException(0, "Only numeric or string group data types are allowed for TRUNC function. Got " + inputType1.name());
}
}
Aggregations