use of org.apache.hadoop.io.NullWritable in project hive by apache.
the class TestLlapOutputFormat method testValues.
@Test
public void testValues() throws Exception {
JobConf job = new JobConf();
for (int k = 0; k < 5; ++k) {
String id = "foobar" + k;
job.set(LlapOutputFormat.LLAP_OF_ID_KEY, id);
LlapOutputFormat format = new LlapOutputFormat();
HiveConf conf = new HiveConf();
Socket socket = new Socket("localhost", service.getPort());
LOG.debug("Socket connected");
OutputStream socketStream = socket.getOutputStream();
LlapOutputSocketInitMessage.newBuilder().setFragmentId(id).build().writeDelimitedTo(socketStream);
socketStream.flush();
Thread.sleep(3000);
LOG.debug("Data written");
RecordWriter<NullWritable, Text> writer = format.getRecordWriter(null, job, null, null);
Text text = new Text();
LOG.debug("Have record writer");
for (int i = 0; i < 10; ++i) {
text.set("" + i);
writer.write(NullWritable.get(), text);
}
writer.close(null);
InputStream in = socket.getInputStream();
LlapBaseRecordReader reader = new LlapBaseRecordReader(in, null, Text.class, job, null, null);
LOG.debug("Have record reader");
// Send done event, which LlapRecordReader is expecting upon end of input
reader.handleEvent(ReaderEvent.doneEvent());
int count = 0;
while (reader.next(NullWritable.get(), text)) {
LOG.debug(text.toString());
count++;
}
reader.close();
Assert.assertEquals(10, count);
}
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestTextOutputFormat method testCompress.
/**
* test compressed file
* @throws IOException
*/
@Test
public void testCompress() throws IOException {
JobConf job = new JobConf();
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
job.set(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS, "true");
FileOutputFormat.setOutputPath(job, workDir.getParent().getParent());
FileOutputFormat.setWorkOutputPath(job, workDir);
FileSystem fs = workDir.getFileSystem(job);
if (!fs.mkdirs(workDir)) {
fail("Failed to create output directory");
}
String file = "test_compress.txt";
// A reporter that does nothing
Reporter reporter = Reporter.NULL;
TextOutputFormat<Object, Object> theOutputFormat = new TextOutputFormat<Object, Object>();
RecordWriter<Object, Object> theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
Text key1 = new Text("key1");
Text key2 = new Text("key2");
Text val1 = new Text("val1");
Text val2 = new Text("val2");
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key1, val1);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val1);
theRecordWriter.write(nullWritable, val2);
theRecordWriter.write(key2, nullWritable);
theRecordWriter.write(key1, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key2, val2);
} finally {
theRecordWriter.close(reporter);
}
StringBuffer expectedOutput = new StringBuffer();
expectedOutput.append(key1).append("\t").append(val1).append("\n");
expectedOutput.append(val1).append("\n");
expectedOutput.append(val2).append("\n");
expectedOutput.append(key2).append("\n");
expectedOutput.append(key1).append("\n");
expectedOutput.append(key2).append("\t").append(val2).append("\n");
DefaultCodec codec = new DefaultCodec();
codec.setConf(job);
Path expectedFile = new Path(workDir, file + codec.getDefaultExtension());
final FileInputStream istream = new FileInputStream(expectedFile.toString());
CompressionInputStream cistream = codec.createInputStream(istream);
LineReader reader = new LineReader(cistream);
String output = "";
Text out = new Text();
while (reader.readLine(out) > 0) {
output += out;
output += "\n";
}
reader.close();
assertEquals(expectedOutput.toString(), output);
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestWrappedRecordReaderClassloader method testClassLoader.
/**
* Tests the class loader set by {@link JobConf#setClassLoader(ClassLoader)}
* is inherited by any {@link WrappedRecordReader}s created by
* {@link CompositeRecordReader}
*/
@Test
public void testClassLoader() throws Exception {
JobConf job = new JobConf();
Fake_ClassLoader classLoader = new Fake_ClassLoader();
job.setClassLoader(classLoader);
assertTrue(job.getClassLoader() instanceof Fake_ClassLoader);
FileSystem fs = FileSystem.get(job);
Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
Path base = new Path(testdir, "/empty");
Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
inputFormat.getRecordReader(inputFormat.getSplits(job, 1)[0], job, Reporter.NULL);
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestPipesNonJavaInputFormat method testFormat.
/**
* test PipesNonJavaInputFormat
*/
@Test
public void testFormat() throws IOException {
PipesNonJavaInputFormat inputFormat = new PipesNonJavaInputFormat();
JobConf conf = new JobConf();
Reporter reporter = mock(Reporter.class);
RecordReader<FloatWritable, NullWritable> reader = inputFormat.getRecordReader(new FakeSplit(), conf, reporter);
assertEquals(0.0f, reader.getProgress(), 0.001);
// input and output files
File input1 = new File(workSpace + File.separator + "input1");
if (!input1.getParentFile().exists()) {
Assert.assertTrue(input1.getParentFile().mkdirs());
}
if (!input1.exists()) {
Assert.assertTrue(input1.createNewFile());
}
File input2 = new File(workSpace + File.separator + "input2");
if (!input2.exists()) {
Assert.assertTrue(input2.createNewFile());
}
// set data for splits
conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, StringUtils.escapeString(input1.getAbsolutePath()) + "," + StringUtils.escapeString(input2.getAbsolutePath()));
InputSplit[] splits = inputFormat.getSplits(conf, 2);
assertEquals(2, splits.length);
PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(conf, splits[0]);
// empty dummyRecordReader
assertNull(dummyRecordReader.createKey());
assertNull(dummyRecordReader.createValue());
assertEquals(0, dummyRecordReader.getPos());
assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
// test method next
assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
dummyRecordReader.close();
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class InputSampler method writePartitionFile.
/**
* Write a partition file for the given job, using the Sampler provided.
* Queries the sampler for a sample keyset, sorts by the output key
* comparator, selects the keys for each rank, and writes to the destination
* returned from {@link TotalOrderPartitioner#getPartitionFile}.
*/
// getInputFormat, getOutputKeyComparator
@SuppressWarnings("unchecked")
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = job.getConfiguration();
final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
int numPartitions = job.getNumReduceTasks();
K[] samples = (K[]) sampler.getSample(inf, job);
LOG.info("Using " + samples.length + " samples");
RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
Arrays.sort(samples, comparator);
Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
FileSystem fs = dst.getFileSystem(conf);
fs.delete(dst, false);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
NullWritable nullValue = NullWritable.get();
float stepSize = samples.length / (float) numPartitions;
int last = -1;
for (int i = 1; i < numPartitions; ++i) {
int k = Math.round(stepSize * i);
while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
++k;
}
writer.append(samples[k], nullValue);
last = k;
}
writer.close();
}
Aggregations