Search in sources :

Example 56 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinDatamerge method testNestedJoin.

@Test
public void testNestedJoin() throws Exception {
    // outer(inner(S1,...,Sn),outer(S1,...Sn))
    final int SOURCES = 3;
    final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
    Configuration conf = new Configuration();
    Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
    int[][] source = new int[SOURCES][];
    for (int i = 0; i < SOURCES; ++i) {
        source[i] = new int[ITEMS];
        for (int j = 0; j < ITEMS; ++j) {
            source[i][j] = (i + 2) * (j + 1);
        }
    }
    Path[] src = new Path[SOURCES];
    SequenceFile.Writer[] out = createWriters(base, conf, SOURCES, src);
    IntWritable k = new IntWritable();
    for (int i = 0; i < SOURCES; ++i) {
        IntWritable v = new IntWritable();
        v.set(i);
        for (int j = 0; j < ITEMS; ++j) {
            k.set(source[i][j]);
            out[i].append(k, v);
        }
        out[i].close();
    }
    out = null;
    StringBuilder sb = new StringBuilder();
    sb.append("outer(inner(");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        if (i + 1 != SOURCES)
            sb.append(",");
    }
    sb.append("),outer(");
    sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "foobar"));
    sb.append(",");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        sb.append(",");
    }
    sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "raboof") + "))");
    conf.set(CompositeInputFormat.JOIN_EXPR, sb.toString());
    MapReduceTestUtil.Fake_IF.setKeyClass(conf, IntWritable.class);
    MapReduceTestUtil.Fake_IF.setValClass(conf, IntWritable.class);
    Job job = Job.getInstance(conf);
    Path outf = new Path(base, "out");
    FileOutputFormat.setOutputPath(job, outf);
    job.setInputFormatClass(CompositeInputFormat.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(TupleWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);
    assertTrue("Job failed", job.isSuccessful());
    FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
    assertEquals(1, outlist.length);
    assertTrue(0 < outlist[0].getLen());
    SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), conf);
    TupleWritable v = new TupleWritable();
    while (r.next(k, v)) {
        assertFalse(((TupleWritable) v.get(1)).has(0));
        assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
        boolean chk = true;
        int ki = k.get();
        for (int i = 2; i < SOURCES + 2; ++i) {
            if ((ki % i) == 0 && ki <= i * ITEMS) {
                assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
            } else
                chk = false;
        }
        if (chk) {
            // present in all sources; chk inner
            assertTrue(v.has(0));
            for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
        } else {
            // should not be present in inner join
            assertFalse(v.has(0));
        }
    }
    r.close();
    base.getFileSystem(conf).delete(base, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) SequenceFile(org.apache.hadoop.io.SequenceFile) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 57 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestInputSampler method testSplitSampler.

/**
   * Verify SplitSampler contract, that an equal number of records are taken
   * from the first splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
    final int TOT_SPLITS = 15;
    final int NUM_SPLITS = 5;
    final int STEP_SAMPLE = 5;
    final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i * STEP_SAMPLE;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : Job(org.apache.hadoop.mapreduce.Job) NullWritable(org.apache.hadoop.io.NullWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 58 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestInputSampler method testIntervalSampler.

/**
   * Verify IntervalSampler contract, that samples are taken at regular
   * intervals from the given splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
    final int TOT_SPLITS = 16;
    final int PER_SPLIT_SAMPLE = 4;
    final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
    final double FREQ = 1.0 / TOT_SPLITS;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 59 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class ExternalMapperReducer method reduce.

public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector<WritableComparable, IntWritable> output, Reporter reporter) throws IOException {
    int count = 0;
    while (values.hasNext()) {
        count++;
        values.next();
    }
    output.collect(key, new IntWritable(count));
}
Also used : IntWritable(org.apache.hadoop.io.IntWritable)

Example 60 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestComparators method configure.

@Before
public void configure() throws Exception {
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    Path inDir = new Path(testdir, "in");
    Path outDir = new Path(testdir, "out");
    FileSystem fs = FileSystem.get(conf);
    fs.delete(testdir, true);
    conf.setInputFormat(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    // set up two map jobs, so we can test merge phase in Reduce also
    conf.setNumMapTasks(2);
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
    }
    // set up input data in 2 files 
    Path inFile = new Path(inDir, "part0");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(11), new IntWritable(999));
    writer.append(new IntWritable(23), new IntWritable(456));
    writer.append(new IntWritable(10), new IntWritable(780));
    writer.close();
    inFile = new Path(inDir, "part1");
    writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(45), new IntWritable(100));
    writer.append(new IntWritable(18), new IntWritable(200));
    writer.append(new IntWritable(27), new IntWritable(300));
    writer.close();
    jc = new JobClient(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) IntWritable(org.apache.hadoop.io.IntWritable) Before(org.junit.Before)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)338 Test (org.junit.Test)120 Text (org.apache.hadoop.io.Text)115 LongWritable (org.apache.hadoop.io.LongWritable)79 Path (org.apache.hadoop.fs.Path)66 FloatWritable (org.apache.hadoop.io.FloatWritable)58 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)56 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)56 BooleanWritable (org.apache.hadoop.io.BooleanWritable)51 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)50 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)47 BytesWritable (org.apache.hadoop.io.BytesWritable)45 SequenceFile (org.apache.hadoop.io.SequenceFile)41 ArrayList (java.util.ArrayList)40 Writable (org.apache.hadoop.io.Writable)39 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)37 Configuration (org.apache.hadoop.conf.Configuration)35 IOException (java.io.IOException)30 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)29 Random (java.util.Random)28