Search in sources :

Example 16 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class LineRecordReader method nextKeyValue.

public boolean nextKeyValue() throws IOException {
    if (key == null) {
        key = new LongWritable();
    }
    key.set(pos);
    if (value == null) {
        value = new Text();
    }
    int newSize = 0;
    // split limit i.e. (end - 1)
    while (getFilePosition() <= end || in.needAdditionalRecordAfterSplit()) {
        if (pos == 0) {
            newSize = skipUtfByteOrderMark();
        } else {
            newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos));
            pos += newSize;
        }
        if ((newSize == 0) || (newSize < maxLineLength)) {
            break;
        }
        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }
    if (newSize == 0) {
        key = null;
        value = null;
        return false;
    } else {
        return true;
    }
}
Also used : Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable)

Example 17 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class NNBench method createControlFiles.

/**
   * Create control files before a test run.
   * Number of files created is equal to the number of maps specified
   * 
   * @throws IOException on error
   */
private void createControlFiles() throws IOException {
    LOG.info("Creating " + numberOfMaps + " control files");
    for (int i = 0; i < numberOfMaps; i++) {
        String strFileName = "NNBench_Controlfile_" + i;
        Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME), strFileName);
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(getConf(), Writer.file(filePath), Writer.keyClass(Text.class), Writer.valueClass(LongWritable.class), Writer.compression(CompressionType.NONE));
            writer.append(new Text(strFileName), new LongWritable(i));
        } finally {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Writer(org.apache.hadoop.io.SequenceFile.Writer)

Example 18 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestDataJoin method confirmOutput.

private static void confirmOutput(Path out, JobConf job, int srcs) throws IOException {
    FileSystem fs = out.getFileSystem(job);
    FileStatus[] outlist = fs.listStatus(out);
    assertEquals(1, outlist.length);
    assertTrue(0 < outlist[0].getLen());
    FSDataInputStream in = fs.open(outlist[0].getPath());
    LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
    LongWritable k = new LongWritable();
    Text v = new Text();
    int count = 0;
    while (rr.next(k, v)) {
        String[] vals = v.toString().split("\t");
        assertEquals(srcs + 1, vals.length);
        int[] ivals = new int[vals.length];
        for (int i = 0; i < vals.length; ++i) ivals[i] = Integer.parseInt(vals[i]);
        assertEquals(0, ivals[0] % (srcs * srcs));
        for (int i = 1; i < vals.length; ++i) {
            assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
        }
        ++count;
    }
    assertEquals(4, count);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable)

Example 19 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestDBInputFormat method testDBRecordReader.

/**
   * 
   * test DBRecordReader. This reader should creates keys, values, know about position.. 
   */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDBRecordReader() throws Exception {
    JobConf job = mock(JobConf.class);
    DBConfiguration dbConfig = mock(DBConfiguration.class);
    String[] fields = { "field1", "filed2" };
    @SuppressWarnings("rawtypes") DBRecordReader reader = new DBInputFormat<NullDBWritable>().new DBRecordReader(new DBInputSplit(), NullDBWritable.class, job, DriverForTest.getConnection(), dbConfig, "condition", fields, "table");
    LongWritable key = reader.createKey();
    assertEquals(0, key.get());
    DBWritable value = reader.createValue();
    assertEquals("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable", value.getClass().getName());
    assertEquals(0, reader.getPos());
    assertFalse(reader.next(key, value));
}
Also used : DBConfiguration(org.apache.hadoop.mapred.lib.db.DBConfiguration) DBRecordReader(org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader) NullDBWritable(org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable) DBInputSplit(org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) NullDBWritable(org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable) DriverForTest(org.apache.hadoop.mapreduce.lib.db.DriverForTest) Test(org.junit.Test)

Example 20 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestLineRecordReader method testUncompressedInputDefaultDelimiterPosValue.

@Test
public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    String inputData = "1234567890\r\n12\r\n345";
    Path inputFile = createInputFile(conf, inputData);
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(null);
    reader.initialize(split, context);
    LongWritable key;
    Text value;
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"1234567890"
    assertEquals(10, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:"12"
    assertEquals(2, value.getLength());
    // Key should be 12 right after "1234567890\r\n"
    assertEquals(12, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());
    split = new FileSplit(inputFile, 15, 4, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    // The second split dropped the first record "\n"
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get third record:"345"
    assertEquals(3, value.getLength());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "1234567890\r\n12\r\n345"
    assertEquals(19, key.get());
    inputData = "123456789\r\r\n";
    inputFile = createInputFile(conf, inputData);
    split = new FileSplit(inputFile, 0, 12, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"123456789"
    assertEquals(9, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:""
    assertEquals(0, value.getLength());
    // Key should be 10 right after "123456789\r"
    assertEquals(10, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 12 right after "123456789\r\r\n"
    assertEquals(12, key.get());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Aggregations

LongWritable (org.apache.hadoop.io.LongWritable)445 Text (org.apache.hadoop.io.Text)220 Test (org.junit.Test)171 IntWritable (org.apache.hadoop.io.IntWritable)102 Path (org.apache.hadoop.fs.Path)99 BytesWritable (org.apache.hadoop.io.BytesWritable)70 FloatWritable (org.apache.hadoop.io.FloatWritable)68 Configuration (org.apache.hadoop.conf.Configuration)62 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)62 BooleanWritable (org.apache.hadoop.io.BooleanWritable)60 ArrayList (java.util.ArrayList)59 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)57 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)53 IOException (java.io.IOException)49 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)48 SequenceFile (org.apache.hadoop.io.SequenceFile)42 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)40 FileSystem (org.apache.hadoop.fs.FileSystem)37 JobConf (org.apache.hadoop.mapred.JobConf)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)35