Search in sources :

Example 11 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TestLlapOutputFormat method testValues.

@Test
public void testValues() throws Exception {
    JobConf job = new JobConf();
    for (int k = 0; k < 5; ++k) {
        String id = "foobar" + k;
        job.set(LlapOutputFormat.LLAP_OF_ID_KEY, id);
        LlapOutputFormat format = new LlapOutputFormat();
        HiveConf conf = new HiveConf();
        Socket socket = new Socket("localhost", service.getPort());
        LOG.debug("Socket connected");
        OutputStream socketStream = socket.getOutputStream();
        LlapOutputSocketInitMessage.newBuilder().setFragmentId(id).build().writeDelimitedTo(socketStream);
        socketStream.flush();
        Thread.sleep(3000);
        LOG.debug("Data written");
        RecordWriter<NullWritable, Text> writer = format.getRecordWriter(null, job, null, null);
        Text text = new Text();
        LOG.debug("Have record writer");
        for (int i = 0; i < 10; ++i) {
            text.set("" + i);
            writer.write(NullWritable.get(), text);
        }
        writer.close(null);
        InputStream in = socket.getInputStream();
        LlapBaseRecordReader reader = new LlapBaseRecordReader(in, null, Text.class, job, null, null);
        LOG.debug("Have record reader");
        // Send done event, which LlapRecordReader is expecting upon end of input
        reader.handleEvent(ReaderEvent.doneEvent());
        int count = 0;
        while (reader.next(NullWritable.get(), text)) {
            LOG.debug(text.toString());
            count++;
        }
        reader.close();
        Assert.assertEquals(10, count);
    }
}
Also used : InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Text(org.apache.hadoop.io.Text) JobConf(org.apache.hadoop.mapred.JobConf) NullWritable(org.apache.hadoop.io.NullWritable) Socket(java.net.Socket) Test(org.junit.Test)

Example 12 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestTextOutputFormat method testCompress.

/**
   * test compressed file
   * @throws IOException
   */
@Test
public void testCompress() throws IOException {
    JobConf job = new JobConf();
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    job.set(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS, "true");
    FileOutputFormat.setOutputPath(job, workDir.getParent().getParent());
    FileOutputFormat.setWorkOutputPath(job, workDir);
    FileSystem fs = workDir.getFileSystem(job);
    if (!fs.mkdirs(workDir)) {
        fail("Failed to create output directory");
    }
    String file = "test_compress.txt";
    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    TextOutputFormat<Object, Object> theOutputFormat = new TextOutputFormat<Object, Object>();
    RecordWriter<Object, Object> theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    Text key1 = new Text("key1");
    Text key2 = new Text("key2");
    Text val1 = new Text("val1");
    Text val2 = new Text("val2");
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(reporter);
    }
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append("\t").append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append("\t").append(val2).append("\n");
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(job);
    Path expectedFile = new Path(workDir, file + codec.getDefaultExtension());
    final FileInputStream istream = new FileInputStream(expectedFile.toString());
    CompressionInputStream cistream = codec.createInputStream(istream);
    LineReader reader = new LineReader(cistream);
    String output = "";
    Text out = new Text();
    while (reader.readLine(out) > 0) {
        output += out;
        output += "\n";
    }
    reader.close();
    assertEquals(expectedOutput.toString(), output);
}
Also used : Path(org.apache.hadoop.fs.Path) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) FileInputStream(java.io.FileInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 13 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestWrappedRecordReaderClassloader method testClassLoader.

/**
   * Tests the class loader set by {@link JobConf#setClassLoader(ClassLoader)}
   * is inherited by any {@link WrappedRecordReader}s created by
   * {@link CompositeRecordReader}
   */
@Test
public void testClassLoader() throws Exception {
    JobConf job = new JobConf();
    Fake_ClassLoader classLoader = new Fake_ClassLoader();
    job.setClassLoader(classLoader);
    assertTrue(job.getClassLoader() instanceof Fake_ClassLoader);
    FileSystem fs = FileSystem.get(job);
    Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
    Path base = new Path(testdir, "/empty");
    Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
    job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
    CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
    inputFormat.getRecordReader(inputFormat.getSplits(job, 1)[0], job, Reporter.NULL);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 14 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestPipesNonJavaInputFormat method testFormat.

/**
   *  test PipesNonJavaInputFormat
    */
@Test
public void testFormat() throws IOException {
    PipesNonJavaInputFormat inputFormat = new PipesNonJavaInputFormat();
    JobConf conf = new JobConf();
    Reporter reporter = mock(Reporter.class);
    RecordReader<FloatWritable, NullWritable> reader = inputFormat.getRecordReader(new FakeSplit(), conf, reporter);
    assertEquals(0.0f, reader.getProgress(), 0.001);
    // input and output files
    File input1 = new File(workSpace + File.separator + "input1");
    if (!input1.getParentFile().exists()) {
        Assert.assertTrue(input1.getParentFile().mkdirs());
    }
    if (!input1.exists()) {
        Assert.assertTrue(input1.createNewFile());
    }
    File input2 = new File(workSpace + File.separator + "input2");
    if (!input2.exists()) {
        Assert.assertTrue(input2.createNewFile());
    }
    // set data for splits
    conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, StringUtils.escapeString(input1.getAbsolutePath()) + "," + StringUtils.escapeString(input2.getAbsolutePath()));
    InputSplit[] splits = inputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(conf, splits[0]);
    // empty dummyRecordReader
    assertNull(dummyRecordReader.createKey());
    assertNull(dummyRecordReader.createValue());
    assertEquals(0, dummyRecordReader.getPos());
    assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
    // test method next
    assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
    assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
    dummyRecordReader.close();
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Reporter(org.apache.hadoop.mapred.Reporter) FakeSplit(org.apache.hadoop.mapred.pipes.TestPipeApplication.FakeSplit) JobConf(org.apache.hadoop.mapred.JobConf) NullWritable(org.apache.hadoop.io.NullWritable) File(java.io.File) InputSplit(org.apache.hadoop.mapred.InputSplit) Test(org.junit.Test)

Example 15 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class InputSampler method writePartitionFile.

/**
   * Write a partition file for the given job, using the Sampler provided.
   * Queries the sampler for a sample keyset, sorts by the output key
   * comparator, selects the keys for each rank, and writes to the destination
   * returned from {@link TotalOrderPartitioner#getPartitionFile}.
   */
// getInputFormat, getOutputKeyComparator
@SuppressWarnings("unchecked")
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    fs.delete(dst, false);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) NullWritable(org.apache.hadoop.io.NullWritable) RawComparator(org.apache.hadoop.io.RawComparator) SequenceFile(org.apache.hadoop.io.SequenceFile) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) FileSystem(org.apache.hadoop.fs.FileSystem)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)113 Test (org.junit.Test)68 Path (org.apache.hadoop.fs.Path)47 Configuration (org.apache.hadoop.conf.Configuration)44 File (java.io.File)33 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)24 JobConf (org.apache.hadoop.mapred.JobConf)24 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)17 InputSplit (org.apache.hadoop.mapred.InputSplit)17 LongWritable (org.apache.hadoop.io.LongWritable)16 IntWritable (org.apache.hadoop.io.IntWritable)11 IOException (java.io.IOException)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)8 Pair (org.apache.hadoop.mrunit.types.Pair)8 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8