Examples with Reader - org.apache.hadoop.io.file.tfile.TFile.Reader

Example 1 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileUnsortedByteArrays method testScanRange.

// we still can scan records in an unsorted TFile
@Test
public void testScanRange() throws IOException {
    Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
    Assert.assertFalse(reader.isSorted());
    Assert.assertEquals((int) reader.getEntryCount(), 4);
    Scanner scanner = reader.createScanner();
    try {
        // read key and value
        byte[] kbuf = new byte[BUF_SIZE];
        int klen = scanner.entry().getKeyLength();
        scanner.entry().getKey(kbuf);
        Assert.assertEquals(new String(kbuf, 0, klen), "keyZ");
        byte[] vbuf = new byte[BUF_SIZE];
        int vlen = scanner.entry().getValueLength();
        scanner.entry().getValue(vbuf);
        Assert.assertEquals(new String(vbuf, 0, vlen), "valueZ");
        scanner.advance();
        // now try get value first
        vbuf = new byte[BUF_SIZE];
        vlen = scanner.entry().getValueLength();
        scanner.entry().getValue(vbuf);
        Assert.assertEquals(new String(vbuf, 0, vlen), "valueM");
        kbuf = new byte[BUF_SIZE];
        klen = scanner.entry().getKeyLength();
        scanner.entry().getKey(kbuf);
        Assert.assertEquals(new String(kbuf, 0, klen), "keyM");
    } finally {
        scanner.close();
        reader.close();
    }
}

Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) Test(org.junit.Test)

Example 2 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileSeek method seekTFile.

public void seekTFile() throws IOException {
    int miss = 0;
    long totalBytes = 0;
    FSDataInputStream fsdis = fs.open(path);
    Reader reader = new Reader(fsdis, fs.getFileStatus(path).getLen(), conf);
    KeySampler kSampler = new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), keyLenGen);
    Scanner scanner = reader.createScanner();
    BytesWritable key = new BytesWritable();
    BytesWritable val = new BytesWritable();
    timer.reset();
    timer.start();
    for (int i = 0; i < options.seekCount; ++i) {
        kSampler.next(key);
        scanner.lowerBound(key.getBytes(), 0, key.getLength());
        if (!scanner.atEnd()) {
            scanner.entry().get(key, val);
            totalBytes += key.getLength();
            totalBytes += val.getLength();
        } else {
            ++miss;
        }
    }
    timer.stop();
    // in us.
    double duration = (double) timer.read() / 1000;
    System.out.printf("time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n", timer.toString(), NanoTimer.nanoTimeToString(timer.read() / options.seekCount), options.seekCount - miss, miss, (double) totalBytes / 1024 / (options.seekCount - miss));
}

Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 3 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileSplit method checkRecNums.

void checkRecNums() throws IOException {
    long fileLen = fs.getFileStatus(path).getLen();
    Reader reader = new Reader(fs.open(path), fileLen, conf);
    long totalRecs = reader.getEntryCount();
    long begin = random.nextLong() % (totalRecs / 2);
    if (begin < 0)
        begin += (totalRecs / 2);
    long end = random.nextLong() % (totalRecs / 2);
    if (end < 0)
        end += (totalRecs / 2);
    end += (totalRecs / 2) + 1;
    assertEquals("RecNum for offset=0 should be 0", 0, reader.getRecordNumNear(0));
    for (long x : new long[] { fileLen, fileLen + 1, 2 * fileLen }) {
        assertEquals("RecNum for offset>=fileLen should be total entries", totalRecs, reader.getRecordNumNear(x));
    }
    for (long i = 0; i < 100; ++i) {
        assertEquals("Locaton to RecNum conversion not symmetric", i, reader.getRecordNumByLocation(reader.getLocationByRecordNum(i)));
    }
    for (long i = 1; i < 100; ++i) {
        long x = totalRecs - i;
        assertEquals("Locaton to RecNum conversion not symmetric", x, reader.getRecordNumByLocation(reader.getLocationByRecordNum(x)));
    }
    for (long i = begin; i < end; ++i) {
        assertEquals("Locaton to RecNum conversion not symmetric", i, reader.getRecordNumByLocation(reader.getLocationByRecordNum(i)));
    }
    for (int i = 0; i < 1000; ++i) {
        long x = random.nextLong() % totalRecs;
        if (x < 0)
            x += totalRecs;
        assertEquals("Locaton to RecNum conversion not symmetric", x, reader.getRecordNumByLocation(reader.getLocationByRecordNum(x)));
    }
}

Also used : Reader(org.apache.hadoop.io.file.tfile.TFile.Reader)

Example 4 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFile method unsortedWithSomeCodec.

// unsorted with some codec
void unsortedWithSomeCodec(String codec) throws IOException {
    Path uTfile = new Path(ROOT, "unsorted.tfile");
    FSDataOutputStream fout = createFSOutput(uTfile);
    Writer writer = new Writer(fout, minBlockSize, codec, null, conf);
    writeRecords(writer);
    writer.close();
    fout.close();
    FSDataInputStream fin = fs.open(uTfile);
    Reader reader = new Reader(fs.open(uTfile), fs.getFileStatus(uTfile).getLen(), conf);
    Scanner scanner = reader.createScanner();
    readAllRecords(scanner);
    scanner.close();
    reader.close();
    fin.close();
    fs.delete(uTfile, true);
}

Also used : Path(org.apache.hadoop.fs.Path) Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hadoop.io.file.tfile.TFile.Writer)

Example 5 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFile method testMetaBlocks.

// test meta blocks for tfiles
@Test
public void testMetaBlocks() throws IOException {
    Path mFile = new Path(ROOT, "meta.tfile");
    FSDataOutputStream fout = createFSOutput(mFile);
    Writer writer = new Writer(fout, minBlockSize, "none", null, conf);
    someTestingWithMetaBlock(writer, "none");
    writer.close();
    fout.close();
    FSDataInputStream fin = fs.open(mFile);
    Reader reader = new Reader(fin, fs.getFileStatus(mFile).getLen(), conf);
    someReadingWithMetaBlock(reader);
    fs.delete(mFile, true);
    reader.close();
    fin.close();
}

Also used : Path(org.apache.hadoop.fs.Path) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hadoop.io.file.tfile.TFile.Writer) Test(org.junit.Test)

Aggregations

Reader (org.apache.hadoop.io.file.tfile.TFile.Reader)28 Scanner (org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner)22 Test (org.junit.Test)16 IOException (java.io.IOException)9 EOFException (java.io.EOFException)7 Path (org.apache.hadoop.fs.Path)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 BytesWritable (org.apache.hadoop.io.BytesWritable)3 Writer (org.apache.hadoop.io.file.tfile.TFile.Writer)3 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 Random (java.util.Random)1 Location (org.apache.hadoop.io.file.tfile.TFile.Reader.Location)1