Search in sources :

Example 26 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class IsmReaderTest method testReadMissingKeysBypassingBloomFilter.

@Test
public void testReadMissingKeysBypassingBloomFilter() throws Exception {
    File tmpFile = tmpFolder.newFile();
    List<IsmRecord<byte[]>> data = new ArrayList<>();
    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[] { 0x04 }), EMPTY));
    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[] { 0x08 }), EMPTY));
    writeElementsToFile(data, tmpFile);
    IsmReader<byte[]> reader = new IsmReaderImpl<byte[]>(FileSystems.matchSingleFileSpec(tmpFile.getAbsolutePath()).resourceId(), CODER, cache) {

        // We use this override to get around the Bloom filter saying that the key doesn't exist.
        @Override
        boolean bloomFilterMightContain(RandomAccessData keyBytes) {
            return true;
        }
    };
    // Check that we got false with a key before all keys contained in the file.
    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[] { 0x02 })).start());
    // Check that we got false with a key between two other keys contained in the file.
    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[] { 0x06 })).start());
    // Check that we got false with a key that is after all keys contained in the file.
    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[] { 0x10 })).start());
}
Also used : RandomAccessData(org.apache.beam.runners.dataflow.util.RandomAccessData) ArrayList(java.util.ArrayList) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) File(java.io.File) Test(org.junit.Test)

Aggregations

IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)26 Test (org.junit.Test)17 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 ArrayList (java.util.ArrayList)12 File (java.io.File)8 KV (org.apache.beam.sdk.values.KV)8 HashMap (java.util.HashMap)7 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)7 Instant (org.joda.time.Instant)7 SortedMap (java.util.SortedMap)6 TreeMap (java.util.TreeMap)6 Callable (java.util.concurrent.Callable)6 Future (java.util.concurrent.Future)6 Source (com.google.api.services.dataflow.model.Source)5 Collection (java.util.Collection)5 Map (java.util.Map)5 RandomAccessData (org.apache.beam.runners.dataflow.util.RandomAccessData)5 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 TransformedMap (org.apache.beam.runners.dataflow.BatchViewOverrides.TransformedMap)2