Search in sources :

Example 46 with IntWritable

use of org.apache.hadoop.io.IntWritable in project goldenorb by jzachr.

the class SampleIntMessageTest method startServer.

/**
 * 
 */
@SuppressWarnings("unchecked")
@Before
public void startServer() throws IOException {
    server = new RPCServer<IntMessage, IntWritable>(SERVER_PORT);
    server.start();
    Configuration conf = new Configuration();
    InetSocketAddress addr = new InetSocketAddress("localhost", SERVER_PORT);
    if (client == null)
        client = (RPCProtocol<IntMessage, IntWritable>) RPC.waitForProxy(RPCProtocol.class, RPCProtocol.versionID, addr, conf);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InetSocketAddress(java.net.InetSocketAddress) IntMessage(org.goldenorb.types.message.IntMessage) IntWritable(org.apache.hadoop.io.IntWritable) Before(org.junit.Before)

Example 47 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws IOException, InterruptedException {
    Job job = Job.getInstance(conf);
    Random random = new Random();
    long seed = random.nextLong();
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create files with a variety of lengths
    createFiles(length, numFiles, random, job);
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    // create a combine split for the files
    InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("splitting: got =        " + splits.size());
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.size());
        InputSplit split = splits.get(0);
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context);
        MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
        try {
            while (reader.nextKeyValue()) {
                IntWritable key = reader.getCurrentKey();
                BytesWritable value = reader.getCurrentValue();
                assertNotNull("Value should not be null.", value);
                final int k = key.get();
                LOG.debug("read " + k);
                assertFalse("Key in multiple partitions.", bits.get(k));
                bits.set(k);
            }
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 48 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method createFiles.

private static void createFiles(int length, int numFiles, Random random, Job job) throws IOException {
    Range[] ranges = createRanges(length, numFiles, random);
    for (int i = 0; i < numFiles; i++) {
        Path file = new Path(workDir, "test_" + i + ".seq");
        // create a file with length entries
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(localFs, job.getConfiguration(), file, IntWritable.class, BytesWritable.class);
        Range range = ranges[i];
        try {
            for (int j = range.start; j < range.end; j++) {
                IntWritable key = new IntWritable(j);
                byte[] data = new byte[random.nextInt(10)];
                random.nextBytes(data);
                BytesWritable value = new BytesWritable(data);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 49 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMRSequenceFileAsTextInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    Job job = Job.getInstance(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    int seed = new Random().nextInt();
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                LongWritable value = new LongWritable(10 * i);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
        // try splitting the file in a variety of sizes
        InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
        for (int i = 0; i < 3; i++) {
            // check each split
            BitSet bits = new BitSet(length);
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            FileInputFormat.setMaxInputSplitSize(job, fs.getFileStatus(file).getLen() / numSplits);
            for (InputSplit split : format.getSplits(job)) {
                RecordReader<Text, Text> reader = format.createRecordReader(split, context);
                MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
                reader.initialize(split, mcontext);
                Class<?> readerClass = reader.getClass();
                assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
                Text key;
                try {
                    int count = 0;
                    while (reader.nextKeyValue()) {
                        key = reader.getCurrentKey();
                        int keyInt = Integer.parseInt(key.toString());
                        assertFalse("Key in multiple partitions.", bits.get(keyInt));
                        bits.set(keyInt);
                        count++;
                    }
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 50 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMRMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    String input = "a\nb\nc\nd\ne\nc\nd\ne";
    Configuration conf = createJobConf();
    Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
    job.setJobName("mo");
    MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, withCounters);
    job.setMapperClass(MOMap.class);
    job.setReducerClass(MOReduce.class);
    job.waitForCompletion(true);
    // assert number of named output part files
    int namedOutputCount = 0;
    int valueBasedOutputCount = 0;
    FileSystem fs = OUT_DIR.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(OUT_DIR);
    for (FileStatus status : statuses) {
        String fileName = status.getPath().getName();
        if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) {
            namedOutputCount++;
        } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
            valueBasedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    assertEquals(5, valueBasedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith(TEXT));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);
    assertEquals(IntWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    IntWritable key = new IntWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals(SEQUENCE, value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    if (withCounters) {
        CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
        assertEquals(9, counters.size());
        assertEquals(4, counters.findCounter(TEXT).getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
        assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
        assertEquals(2, counters.findCounter("a").getValue());
        assertEquals(2, counters.findCounter("b").getValue());
        assertEquals(4, counters.findCounter("c").getValue());
        assertEquals(4, counters.findCounter("d").getValue());
        assertEquals(4, counters.findCounter("e").getValue());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) CounterGroup(org.apache.hadoop.mapreduce.CounterGroup) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)338 Test (org.junit.Test)120 Text (org.apache.hadoop.io.Text)115 LongWritable (org.apache.hadoop.io.LongWritable)79 Path (org.apache.hadoop.fs.Path)66 FloatWritable (org.apache.hadoop.io.FloatWritable)58 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)56 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)56 BooleanWritable (org.apache.hadoop.io.BooleanWritable)51 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)50 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)47 BytesWritable (org.apache.hadoop.io.BytesWritable)45 SequenceFile (org.apache.hadoop.io.SequenceFile)41 ArrayList (java.util.ArrayList)40 Writable (org.apache.hadoop.io.Writable)39 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)37 Configuration (org.apache.hadoop.conf.Configuration)35 IOException (java.io.IOException)30 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)29 Random (java.util.Random)28