Search in sources :

Example 26 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestComparators method configure.

@Before
public void configure() throws Exception {
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    Path inDir = new Path(testdir, "in");
    Path outDir = new Path(testdir, "out");
    FileSystem fs = FileSystem.get(conf);
    fs.delete(testdir, true);
    conf.setInputFormat(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    // set up two map jobs, so we can test merge phase in Reduce also
    conf.setNumMapTasks(2);
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
    }
    // set up input data in 2 files 
    Path inFile = new Path(inDir, "part0");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(11), new IntWritable(999));
    writer.append(new IntWritable(23), new IntWritable(456));
    writer.append(new IntWritable(10), new IntWritable(780));
    writer.close();
    inFile = new Path(inDir, "part1");
    writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(45), new IntWritable(100));
    writer.append(new IntWritable(18), new IntWritable(200));
    writer.append(new IntWritable(27), new IntWritable(300));
    writer.close();
    jc = new JobClient(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) IntWritable(org.apache.hadoop.io.IntWritable) Before(org.junit.Before)

Example 27 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    Reporter reporter = Reporter.NULL;
    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create a file with various lengths
    createFiles(length, numFiles, random);
    // create a combine split for the files
    InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
    IntWritable key = new IntWritable();
    BytesWritable value = new BytesWritable();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        LOG.info("splitting: got =        " + splits.length);
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.length);
        InputSplit split = splits[0];
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check each split
        BitSet bits = new BitSet(length);
        RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(split, job, reporter);
        try {
            while (reader.next(key, value)) {
                assertFalse("Key in multiple partitions.", bits.get(key.get()));
                bits.set(key.get());
            }
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : CombineSequenceFileInputFormat(org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat) BitSet(java.util.BitSet) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 28 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMapReduce method launch.

private static void launch() throws Exception {
    //
    // Generate distribution of ints.  This is the answer key.
    //
    Configuration conf = new Configuration();
    int countsToGo = counts;
    int[] dist = new int[range];
    for (int i = 0; i < range; i++) {
        double avgInts = (1.0 * countsToGo) / (range - i);
        dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
        countsToGo -= dist[i];
    }
    if (countsToGo > 0) {
        dist[dist.length - 1] += countsToGo;
    }
    //
    // Write the answer key to a file.  
    //
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
    }
    Path randomIns = new Path(testdir, "genins");
    if (!fs.mkdirs(randomIns)) {
        throw new IOException("Mkdirs failed to create " + randomIns.toString());
    }
    Path answerkey = new Path(randomIns, "answer.key");
    SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE);
    try {
        for (int i = 0; i < range; i++) {
            out.append(new IntWritable(i), new IntWritable(dist[i]));
        }
    } finally {
        out.close();
    }
    printFiles(randomIns, conf);
    //
    // Now we need to generate the random numbers according to
    // the above distribution.
    //
    // We create a lot of map tasks, each of which takes at least
    // one "line" of the distribution.  (That is, a certain number
    // X is to be generated Y number of times.)
    //
    // A map task emits Y key/val pairs.  The val is X.  The key
    // is a randomly-generated number.
    //
    // The reduce task gets its input sorted by key.  That is, sorted
    // in random order.  It then emits a single line of text that
    // for the given values.  It does not emit the key.
    //
    // Because there's just one reduce task, we emit a single big
    // file of random numbers.
    //
    Path randomOuts = new Path(testdir, "genouts");
    fs.delete(randomOuts, true);
    Job genJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(genJob, randomIns);
    genJob.setInputFormatClass(SequenceFileInputFormat.class);
    genJob.setMapperClass(RandomGenMapper.class);
    FileOutputFormat.setOutputPath(genJob, randomOuts);
    genJob.setOutputKeyClass(IntWritable.class);
    genJob.setOutputValueClass(IntWritable.class);
    genJob.setReducerClass(RandomGenReducer.class);
    genJob.setNumReduceTasks(1);
    genJob.waitForCompletion(true);
    printFiles(randomOuts, conf);
    //
    // Next, we read the big file in and regenerate the 
    // original map.  It's split into a number of parts.
    // (That number is 'intermediateReduces'.)
    //
    // We have many map tasks, each of which read at least one
    // of the output numbers.  For each number read in, the
    // map task emits a key/value pair where the key is the
    // number and the value is "1".
    //
    // We have a single reduce task, which receives its input
    // sorted by the key emitted above.  For each key, there will
    // be a certain number of "1" values.  The reduce task sums
    // these values to compute how many times the given key was
    // emitted.
    //
    // The reduce task then emits a key/val pair where the key
    // is the number in question, and the value is the number of
    // times the key was emitted.  This is the same format as the
    // original answer key (except that numbers emitted zero times
    // will not appear in the regenerated key.)  The answer set
    // is split into a number of pieces.  A final MapReduce job
    // will merge them.
    //
    // There's not really a need to go to 10 reduces here 
    // instead of 1.  But we want to test what happens when
    // you have multiple reduces at once.
    //
    int intermediateReduces = 10;
    Path intermediateOuts = new Path(testdir, "intermediateouts");
    fs.delete(intermediateOuts, true);
    Job checkJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(checkJob, randomOuts);
    checkJob.setMapperClass(RandomCheckMapper.class);
    FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
    checkJob.setOutputKeyClass(IntWritable.class);
    checkJob.setOutputValueClass(IntWritable.class);
    checkJob.setOutputFormatClass(MapFileOutputFormat.class);
    checkJob.setReducerClass(RandomCheckReducer.class);
    checkJob.setNumReduceTasks(intermediateReduces);
    checkJob.waitForCompletion(true);
    printFiles(intermediateOuts, conf);
    //
    // OK, now we take the output from the last job and
    // merge it down to a single file.  The map() and reduce()
    // functions don't really do anything except reemit tuples.
    // But by having a single reduce task here, we end up merging
    // all the files.
    //
    Path finalOuts = new Path(testdir, "finalouts");
    fs.delete(finalOuts, true);
    Job mergeJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
    mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
    mergeJob.setMapperClass(MergeMapper.class);
    FileOutputFormat.setOutputPath(mergeJob, finalOuts);
    mergeJob.setOutputKeyClass(IntWritable.class);
    mergeJob.setOutputValueClass(IntWritable.class);
    mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    mergeJob.setReducerClass(MergeReducer.class);
    mergeJob.setNumReduceTasks(1);
    mergeJob.waitForCompletion(true);
    printFiles(finalOuts, conf);
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    boolean success = true;
    Path recomputedkey = new Path(finalOuts, "part-r-00000");
    SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
    int totalseen = 0;
    try {
        IntWritable key = new IntWritable();
        IntWritable val = new IntWritable();
        for (int i = 0; i < range; i++) {
            if (dist[i] == 0) {
                continue;
            }
            if (!in.next(key, val)) {
                System.err.println("Cannot read entry " + i);
                success = false;
                break;
            } else {
                if (!((key.get() == i) && (val.get() == dist[i]))) {
                    System.err.println("Mismatch!  Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]);
                    success = false;
                }
                totalseen += val.get();
            }
        }
        if (success) {
            if (in.next(key, val)) {
                System.err.println("Unnecessary lines in recomputed key!");
                success = false;
            }
        }
    } finally {
        in.close();
    }
    int originalTotal = 0;
    for (int i = 0; i < dist.length; i++) {
        originalTotal += dist[i];
    }
    System.out.println("Original sum: " + originalTotal);
    System.out.println("Recomputed sum: " + totalseen);
    //
    // Write to "results" whether the test succeeded or not.
    //
    Path resultFile = new Path(testdir, "results");
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
    try {
        bw.write("Success=" + success + "\n");
        System.out.println("Success=" + success);
    } finally {
        bw.close();
    }
    assertTrue("testMapRed failed", success);
    fs.delete(testdir, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable)

Example 29 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestPipeApplication method testPipesPartitioner.

/**
   * test PipesPartitioner
   * test set and get data from  PipesPartitioner
   */
@Test
public void testPipesPartitioner() {
    PipesPartitioner<IntWritable, Text> partitioner = new PipesPartitioner<IntWritable, Text>();
    JobConf configuration = new JobConf();
    Submitter.getJavaPartitioner(configuration);
    partitioner.configure(new JobConf());
    IntWritable iw = new IntWritable(4);
    // the cache empty
    assertEquals(0, partitioner.getPartition(iw, new Text("test"), 2));
    // set data into cache
    PipesPartitioner.setNextPartition(3);
    // get data from cache
    assertEquals(3, partitioner.getPartition(iw, new Text("test"), 2));
}
Also used : Text(org.apache.hadoop.io.Text) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 30 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hive by apache.

the class GenericUDFTrunc method evaluateNumber.

private Object evaluateNumber(DeferredObject[] arguments) throws HiveException, UDFArgumentTypeException {
    if (arguments[0] == null) {
        return null;
    }
    Object input = arguments[0].get();
    if (input == null) {
        return null;
    }
    if (arguments.length == 2 && arguments[1] != null && arguments[1].get() != null && !inputSacleConst) {
        Object scaleObj = null;
        switch(inputScaleOI.getPrimitiveCategory()) {
            case BYTE:
                scaleObj = byteConverter.convert(arguments[1].get());
                scale = ((ByteWritable) scaleObj).get();
                break;
            case SHORT:
                scaleObj = shortConverter.convert(arguments[1].get());
                scale = ((ShortWritable) scaleObj).get();
                break;
            case INT:
                scaleObj = intConverter.convert(arguments[1].get());
                scale = ((IntWritable) scaleObj).get();
                break;
            case LONG:
                scaleObj = longConverter.convert(arguments[1].get());
                long l = ((LongWritable) scaleObj).get();
                if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
                    throw new UDFArgumentException(getFuncName().toUpperCase() + " scale argument out of allowed range");
                }
                scale = (int) l;
            default:
                break;
        }
    }
    switch(inputType1) {
        case VOID:
            return null;
        case DECIMAL:
            HiveDecimalWritable decimalWritable = (HiveDecimalWritable) inputOI.getPrimitiveWritableObject(input);
            HiveDecimal dec = trunc(decimalWritable.getHiveDecimal(), scale);
            if (dec == null) {
                return null;
            }
            return new HiveDecimalWritable(dec);
        case BYTE:
            ByteWritable byteWritable = (ByteWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return byteWritable;
            } else {
                return new ByteWritable((byte) trunc(byteWritable.get(), scale));
            }
        case SHORT:
            ShortWritable shortWritable = (ShortWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return shortWritable;
            } else {
                return new ShortWritable((short) trunc(shortWritable.get(), scale));
            }
        case INT:
            IntWritable intWritable = (IntWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return intWritable;
            } else {
                return new IntWritable((int) trunc(intWritable.get(), scale));
            }
        case LONG:
            LongWritable longWritable = (LongWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return longWritable;
            } else {
                return new LongWritable(trunc(longWritable.get(), scale));
            }
        case FLOAT:
            float f = ((FloatWritable) inputOI.getPrimitiveWritableObject(input)).get();
            return new FloatWritable((float) trunc(f, scale));
        case DOUBLE:
            return trunc(((DoubleWritable) inputOI.getPrimitiveWritableObject(input)), scale);
        default:
            throw new UDFArgumentTypeException(0, "Only numeric or string group data types are allowed for TRUNC function. Got " + inputType1.name());
    }
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)312 Test (org.junit.Test)116 Text (org.apache.hadoop.io.Text)102 LongWritable (org.apache.hadoop.io.LongWritable)70 Path (org.apache.hadoop.fs.Path)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)55 FloatWritable (org.apache.hadoop.io.FloatWritable)48 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)43 BooleanWritable (org.apache.hadoop.io.BooleanWritable)42 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)40 SequenceFile (org.apache.hadoop.io.SequenceFile)39 BytesWritable (org.apache.hadoop.io.BytesWritable)37 Writable (org.apache.hadoop.io.Writable)35 ArrayList (java.util.ArrayList)34 Configuration (org.apache.hadoop.conf.Configuration)33 Random (java.util.Random)29 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)29 IOException (java.io.IOException)28 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)28