Search in sources :

Example 1 with DeltaByteArrayReader

use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.

the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    DeltaByteArrayReader reader = new DeltaByteArrayReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 2 with DeltaByteArrayReader

use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.

the class TestCorruptDeltaByteArrays method testOldReassemblyWithoutCorruption.

@Test
public void testOldReassemblyWithoutCorruption() throws Exception {
    DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
    for (int i = 0; i < 10; i += 1) {
        writer.writeBytes(Binary.fromString(str(i)));
    }
    ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
    // sets previous to new byte[0]
    writer.reset();
    for (int i = 10; i < 20; i += 1) {
        writer.writeBytes(Binary.fromString(str(i)));
    }
    ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();
    DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
    firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
    for (int i = 0; i < 10; i += 1) {
        assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
    }
    DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
    secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));
    for (int i = 10; i < 20; i += 1) {
        assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
    }
}
Also used : DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 3 with DeltaByteArrayReader

use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.

the class TestCorruptDeltaByteArrays method testReassemblyWithoutCorruption.

@Test
public void testReassemblyWithoutCorruption() throws Exception {
    DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
    for (int i = 0; i < 10; i += 1) {
        writer.writeBytes(Binary.fromString(str(i)));
    }
    ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
    // sets previous to new byte[0]
    writer.reset();
    for (int i = 10; i < 20; i += 1) {
        writer.writeBytes(Binary.fromString(str(i)));
    }
    ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();
    DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
    firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
    for (int i = 0; i < 10; i += 1) {
        assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
    }
    DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
    secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));
    secondPageReader.setPreviousReader(firstPageReader);
    for (int i = 10; i < 20; i += 1) {
        assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
    }
}
Also used : DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 4 with DeltaByteArrayReader

use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.

the class TestCorruptDeltaByteArrays method testReassemblyWithCorruptPage.

@Test
public void testReassemblyWithCorruptPage() throws Exception {
    DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
    String lastValue = null;
    for (int i = 0; i < 10; i += 1) {
        lastValue = str(i);
        writer.writeBytes(Binary.fromString(lastValue));
    }
    ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
    // sets previous to new byte[0]
    writer.reset();
    corruptWriter(writer, lastValue);
    for (int i = 10; i < 20; i += 1) {
        writer.writeBytes(Binary.fromString(str(i)));
    }
    ByteBuffer corruptPageBytes = writer.getBytes().toByteBuffer();
    DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
    firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
    for (int i = 0; i < 10; i += 1) {
        assertEquals(str(i), firstPageReader.readBytes().toStringUsingUTF8());
    }
    DeltaByteArrayReader corruptPageReader = new DeltaByteArrayReader();
    corruptPageReader.initFromPage(10, ByteBufferInputStream.wrap(corruptPageBytes));
    try {
        corruptPageReader.readBytes();
        fail("Corrupt page did not throw an exception when read");
    } catch (ArrayIndexOutOfBoundsException e) {
    // expected, this is a corrupt page
    }
    DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
    secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(corruptPageBytes));
    secondPageReader.setPreviousReader(firstPageReader);
    for (int i = 10; i < 20; i += 1) {
        assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
    }
}
Also used : DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 5 with DeltaByteArrayReader

use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.

the class BenchmarkDeltaByteArray method benchmarkSortedStringsWithDeltaLengthByteArrayValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkSortedStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    DeltaByteArrayReader reader = new DeltaByteArrayReader();
    Utils.writeData(writer, sortedVals);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Aggregations

DeltaByteArrayReader (org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader)5 DeltaByteArrayWriter (org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter)5 Test (org.junit.Test)5 ByteBuffer (java.nio.ByteBuffer)3 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)2 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)2 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)2 Binary (org.apache.parquet.io.api.Binary)2