Search in sources :

Example 11 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestCombineTextInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);
    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    createFiles(length, numFiles, random);
    // create a combined split for the files
    CombineTextInputFormat format = new CombineTextInputFormat();
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / 20) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        LOG.info("splitting: got =        " + splits.length);
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.length);
        InputSplit split = splits[0];
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        LOG.debug("split= " + split);
        RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter);
        try {
            int count = 0;
            while (reader.next(key, value)) {
                int v = Integer.parseInt(value.toString());
                LOG.debug("read " + v);
                if (bits.get(v)) {
                    LOG.warn("conflict with " + v + " at position " + reader.getPos());
                }
                assertFalse("Key in multiple partitions.", bits.get(v));
                bits.set(v);
                count++;
            }
            LOG.info("splits=" + split + " count=" + count);
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : CombineTextInputFormat(org.apache.hadoop.mapred.lib.CombineTextInputFormat) Random(java.util.Random) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 12 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestConcatenatedCompressedInput method readSplit.

private static List<Text> readSplit(TextInputFormat format, InputSplit split, JobConf jobConf) throws IOException {
    List<Text> result = new ArrayList<Text>();
    RecordReader<LongWritable, Text> reader = format.getRecordReader(split, jobConf, voidReporter);
    LongWritable key = reader.createKey();
    Text value = reader.createValue();
    while (reader.next(key, value)) {
        result.add(value);
        value = reader.createValue();
    }
    reader.close();
    return result;
}
Also used : ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable)

Example 13 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestFixedLengthInputFormat method testNoRecordLength.

/**
   * Test with no record length set.
   */
@Test(timeout = 5000)
public void testNoRecordLength() throws IOException {
    localFs.delete(workDir, true);
    Path file = new Path(workDir, new String("testFormat.txt"));
    createFile(file, null, 10, 10);
    // Set the fixed length record length config property 
    JobConf job = new JobConf(defaultConf);
    FileInputFormat.setInputPaths(job, workDir);
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    format.configure(job);
    InputSplit[] splits = format.getSplits(job, 1);
    boolean exceptionThrown = false;
    for (InputSplit split : splits) {
        try {
            RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
        } catch (IOException ioe) {
            exceptionThrown = true;
            LOG.info("Exception message:" + ioe.getMessage());
        }
    }
    assertTrue("Exception for not setting record length:", exceptionThrown);
}
Also used : Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException) Test(org.junit.Test)

Example 14 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestFixedLengthInputFormat method readSplit.

private static List<String> readSplit(FixedLengthInputFormat format, InputSplit split, JobConf job) throws IOException {
    List<String> result = new ArrayList<String>();
    RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
    LongWritable key = reader.createKey();
    BytesWritable value = reader.createValue();
    try {
        while (reader.next(key, value)) {
            result.add(new String(value.getBytes(), 0, value.getLength()));
        }
    } finally {
        reader.close();
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable)

Example 15 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestFixedLengthInputFormat method testZeroRecordLength.

/**
   * Test with record length set to 0
   */
@Test(timeout = 5000)
public void testZeroRecordLength() throws IOException {
    localFs.delete(workDir, true);
    Path file = new Path(workDir, new String("testFormat.txt"));
    createFile(file, null, 10, 10);
    // Set the fixed length record length config property 
    JobConf job = new JobConf(defaultConf);
    FileInputFormat.setInputPaths(job, workDir);
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    format.setRecordLength(job, 0);
    format.configure(job);
    InputSplit[] splits = format.getSplits(job, 1);
    boolean exceptionThrown = false;
    for (InputSplit split : splits) {
        try {
            RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
        } catch (IOException ioe) {
            exceptionThrown = true;
            LOG.info("Exception message:" + ioe.getMessage());
        }
    }
    assertTrue("Exception for zero record length:", exceptionThrown);
}
Also used : Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

LongWritable (org.apache.hadoop.io.LongWritable)445 Text (org.apache.hadoop.io.Text)220 Test (org.junit.Test)171 IntWritable (org.apache.hadoop.io.IntWritable)102 Path (org.apache.hadoop.fs.Path)99 BytesWritable (org.apache.hadoop.io.BytesWritable)70 FloatWritable (org.apache.hadoop.io.FloatWritable)68 Configuration (org.apache.hadoop.conf.Configuration)62 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)62 BooleanWritable (org.apache.hadoop.io.BooleanWritable)60 ArrayList (java.util.ArrayList)59 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)57 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)53 IOException (java.io.IOException)49 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)48 SequenceFile (org.apache.hadoop.io.SequenceFile)42 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)40 FileSystem (org.apache.hadoop.fs.FileSystem)37 JobConf (org.apache.hadoop.mapred.JobConf)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)35