Search in sources :

Example 26 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileSplit method readFile.

void readFile() throws IOException {
    long fileLength = fs.getFileStatus(path).getLen();
    int numSplit = 10;
    long splitSize = fileLength / numSplit + 1;
    Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
    long offset = 0;
    long rowCount = 0;
    BytesWritable key, value;
    for (int i = 0; i < numSplit; ++i, offset += splitSize) {
        Scanner scanner = reader.createScannerByByteRange(offset, splitSize);
        int count = 0;
        key = new BytesWritable();
        value = new BytesWritable();
        while (!scanner.atEnd()) {
            scanner.entry().get(key, value);
            ++count;
            scanner.advance();
        }
        scanner.close();
        assertTrue(count > 0);
        rowCount += count;
    }
    assertEquals(rowCount, reader.getEntryCount());
    reader.close();
}
Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 27 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileSplit method readRowSplits.

/* Similar to readFile(), tests the scanner created 
   * by record numbers rather than the offsets.
   */
void readRowSplits(int numSplits) throws IOException {
    Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
    long totalRecords = reader.getEntryCount();
    for (int i = 0; i < numSplits; i++) {
        long startRec = i * totalRecords / numSplits;
        long endRec = (i + 1) * totalRecords / numSplits;
        if (i == numSplits - 1) {
            endRec = totalRecords;
        }
        Scanner scanner = reader.createScannerByRecordNum(startRec, endRec);
        int count = 0;
        BytesWritable key = new BytesWritable();
        BytesWritable value = new BytesWritable();
        long x = startRec;
        while (!scanner.atEnd()) {
            assertEquals("Incorrect RecNum returned by scanner", scanner.getRecordNum(), x);
            scanner.entry().get(key, value);
            ++count;
            assertEquals("Incorrect RecNum returned by scanner", scanner.getRecordNum(), x);
            scanner.advance();
            ++x;
        }
        scanner.close();
        assertTrue(count == (endRec - startRec));
    }
    // make sure specifying range at the end gives zero records.
    Scanner scanner = reader.createScannerByRecordNum(totalRecords, -1);
    assertTrue(scanner.atEnd());
}
Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 28 with Reader

use of org.apache.hadoop.io.file.tfile.TFile.Reader in project hadoop by apache.

the class TestTFileStreams method testFailureNegativeOffset.

@Test
public void testFailureNegativeOffset() throws IOException {
    if (skip)
        return;
    writeRecords(2, true, true);
    Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
    Scanner scanner = reader.createScanner();
    byte[] buf = new byte[K];
    try {
        scanner.entry().getKey(buf, -1);
        fail("Failed to handle key negative offset.");
    } catch (Exception e) {
    // noop, expecting exceptions
    } finally {
    }
    scanner.close();
    reader.close();
}
Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) IOException(java.io.IOException) EOFException(java.io.EOFException) Test(org.junit.Test)

Aggregations

Reader (org.apache.hadoop.io.file.tfile.TFile.Reader)28 Scanner (org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner)22 Test (org.junit.Test)16 IOException (java.io.IOException)9 EOFException (java.io.EOFException)7 Path (org.apache.hadoop.fs.Path)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 BytesWritable (org.apache.hadoop.io.BytesWritable)3 Writer (org.apache.hadoop.io.file.tfile.TFile.Writer)3 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 Random (java.util.Random)1 Location (org.apache.hadoop.io.file.tfile.TFile.Reader.Location)1