Search in sources :

Example 21 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinDatamerge method writeSimpleSrc.

private static Path[] writeSimpleSrc(Path testdir, Configuration conf, int srcs) throws IOException {
    SequenceFile.Writer[] out = null;
    Path[] src = new Path[srcs];
    try {
        out = createWriters(testdir, conf, srcs, src);
        final int capacity = srcs * 2 + 1;
        IntWritable key = new IntWritable();
        IntWritable val = new IntWritable();
        for (int k = 0; k < capacity; ++k) {
            for (int i = 0; i < srcs; ++i) {
                key.set(k % srcs == 0 ? k * srcs : k * srcs + i);
                val.set(10 * k + i);
                out[i].append(key, val);
                if (i == k) {
                    // add duplicate key
                    out[i].append(key, val);
                }
            }
        }
    } finally {
        if (out != null) {
            for (int i = 0; i < srcs; ++i) {
                if (out[i] != null)
                    out[i].close();
            }
        }
    }
    return src;
}
Also used : Path(org.apache.hadoop.fs.Path) IntWritable(org.apache.hadoop.io.IntWritable)

Example 22 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinDatamerge method testNestedJoin.

@Test
public void testNestedJoin() throws Exception {
    // outer(inner(S1,...,Sn),outer(S1,...Sn))
    final int SOURCES = 3;
    final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
    Configuration conf = new Configuration();
    Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
    int[][] source = new int[SOURCES][];
    for (int i = 0; i < SOURCES; ++i) {
        source[i] = new int[ITEMS];
        for (int j = 0; j < ITEMS; ++j) {
            source[i][j] = (i + 2) * (j + 1);
        }
    }
    Path[] src = new Path[SOURCES];
    SequenceFile.Writer[] out = createWriters(base, conf, SOURCES, src);
    IntWritable k = new IntWritable();
    for (int i = 0; i < SOURCES; ++i) {
        IntWritable v = new IntWritable();
        v.set(i);
        for (int j = 0; j < ITEMS; ++j) {
            k.set(source[i][j]);
            out[i].append(k, v);
        }
        out[i].close();
    }
    out = null;
    StringBuilder sb = new StringBuilder();
    sb.append("outer(inner(");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        if (i + 1 != SOURCES)
            sb.append(",");
    }
    sb.append("),outer(");
    sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "foobar"));
    sb.append(",");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        sb.append(",");
    }
    sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "raboof") + "))");
    conf.set(CompositeInputFormat.JOIN_EXPR, sb.toString());
    MapReduceTestUtil.Fake_IF.setKeyClass(conf, IntWritable.class);
    MapReduceTestUtil.Fake_IF.setValClass(conf, IntWritable.class);
    Job job = Job.getInstance(conf);
    Path outf = new Path(base, "out");
    FileOutputFormat.setOutputPath(job, outf);
    job.setInputFormatClass(CompositeInputFormat.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(TupleWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);
    assertTrue("Job failed", job.isSuccessful());
    FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
    assertEquals(1, outlist.length);
    assertTrue(0 < outlist[0].getLen());
    SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), conf);
    TupleWritable v = new TupleWritable();
    while (r.next(k, v)) {
        assertFalse(((TupleWritable) v.get(1)).has(0));
        assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
        boolean chk = true;
        int ki = k.get();
        for (int i = 2; i < SOURCES + 2; ++i) {
            if ((ki % i) == 0 && ki <= i * ITEMS) {
                assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
            } else
                chk = false;
        }
        if (chk) {
            // present in all sources; chk inner
            assertTrue(v.has(0));
            for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
        } else {
            // should not be present in inner join
            assertFalse(v.has(0));
        }
    }
    r.close();
    base.getFileSystem(conf).delete(base, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) SequenceFile(org.apache.hadoop.io.SequenceFile) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 23 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestInputSampler method testSplitSampler.

/**
   * Verify SplitSampler contract, that an equal number of records are taken
   * from the first splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
    final int TOT_SPLITS = 15;
    final int NUM_SPLITS = 5;
    final int STEP_SAMPLE = 5;
    final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i * STEP_SAMPLE;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : Job(org.apache.hadoop.mapreduce.Job) NullWritable(org.apache.hadoop.io.NullWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 24 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestInputSampler method testIntervalSampler.

/**
   * Verify IntervalSampler contract, that samples are taken at regular
   * intervals from the given splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
    final int TOT_SPLITS = 16;
    final int PER_SPLIT_SAMPLE = 4;
    final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
    final double FREQ = 1.0 / TOT_SPLITS;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 25 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class ExternalMapperReducer method reduce.

public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector<WritableComparable, IntWritable> output, Reporter reporter) throws IOException {
    int count = 0;
    while (values.hasNext()) {
        count++;
        values.next();
    }
    output.collect(key, new IntWritable(count));
}
Also used : IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)312 Test (org.junit.Test)116 Text (org.apache.hadoop.io.Text)102 LongWritable (org.apache.hadoop.io.LongWritable)70 Path (org.apache.hadoop.fs.Path)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)55 FloatWritable (org.apache.hadoop.io.FloatWritable)48 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)43 BooleanWritable (org.apache.hadoop.io.BooleanWritable)42 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)40 SequenceFile (org.apache.hadoop.io.SequenceFile)39 BytesWritable (org.apache.hadoop.io.BytesWritable)37 Writable (org.apache.hadoop.io.Writable)35 ArrayList (java.util.ArrayList)34 Configuration (org.apache.hadoop.conf.Configuration)33 Random (java.util.Random)29 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)29 IOException (java.io.IOException)28 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)28