use of org.apache.samza.system.SystemStreamPartition in project samza by apache.
the class TestAvroFileHdfsReader method testRandomRead.
@Test
public void testRandomRead() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
SingleFileHdfsReader reader = new AvroFileHdfsReader(ssp);
reader.open(AVRO_FILE, "0");
for (int i = 0; i < NUM_EVENTS / 2; i++) {
reader.readNext();
}
String offset = reader.nextOffset();
IncomingMessageEnvelope envelope = reader.readNext();
Assert.assertEquals(offset, envelope.getOffset());
GenericRecord record1 = (GenericRecord) envelope.getMessage();
for (int i = 0; i < 5; i++) reader.readNext();
// seek to the offset within the same reader
reader.seek(offset);
Assert.assertEquals(offset, reader.nextOffset());
envelope = reader.readNext();
Assert.assertEquals(offset, envelope.getOffset());
GenericRecord record2 = (GenericRecord) envelope.getMessage();
Assert.assertEquals(record1, record2);
reader.close();
// open a new reader and initialize it with the offset
reader = new AvroFileHdfsReader(ssp);
reader.open(AVRO_FILE, offset);
envelope = reader.readNext();
Assert.assertEquals(offset, envelope.getOffset());
GenericRecord record3 = (GenericRecord) envelope.getMessage();
Assert.assertEquals(record1, record3);
reader.close();
}
use of org.apache.samza.system.SystemStreamPartition in project samza by apache.
the class TestAvroFileHdfsReader method testFileReopen.
@Test
public void testFileReopen() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
SingleFileHdfsReader reader = new AvroFileHdfsReader(ssp);
reader.open(AVRO_FILE, "0");
int index = 0;
for (; index < NUM_EVENTS / 2; index++) {
GenericRecord record = (GenericRecord) reader.readNext().getMessage();
Assert.assertEquals(index, record.get(FIELD_1));
Assert.assertEquals("string_" + index, record.get(FIELD_2).toString());
}
String offset = reader.nextOffset();
reader.close();
reader = new AvroFileHdfsReader(ssp);
reader.open(AVRO_FILE, offset);
for (; index < NUM_EVENTS; index++) {
GenericRecord record = (GenericRecord) reader.readNext().getMessage();
Assert.assertEquals(index, record.get(FIELD_1));
Assert.assertEquals("string_" + index, record.get(FIELD_2).toString());
}
Assert.assertEquals(NUM_EVENTS, index);
reader.close();
}
use of org.apache.samza.system.SystemStreamPartition in project samza by apache.
the class TestAvroFileHdfsReader method testSequentialRead.
@Test
public void testSequentialRead() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
SingleFileHdfsReader reader = new AvroFileHdfsReader(ssp);
reader.open(AVRO_FILE, "0");
int index = 0;
while (reader.hasNext()) {
GenericRecord record = (GenericRecord) reader.readNext().getMessage();
Assert.assertEquals(index, record.get(FIELD_1));
Assert.assertEquals("string_" + index, record.get(FIELD_2).toString());
index++;
}
Assert.assertEquals(NUM_EVENTS, index);
reader.close();
}
use of org.apache.samza.system.SystemStreamPartition in project samza by apache.
the class TestMultiFileHdfsReader method testReaderReopen.
@Test
public void testReaderReopen() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
// read until the middle of the first file
MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
int index = 0;
String offset = "0:0";
for (; index < NUM_EVENTS / 2; index++) {
IncomingMessageEnvelope envelope = multiReader.readNext();
GenericRecord record = (GenericRecord) envelope.getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
offset = envelope.getOffset();
}
multiReader.close();
// read until the middle of the second file
multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), offset);
// skip one duplicate event
multiReader.readNext();
for (; index < NUM_EVENTS + NUM_EVENTS / 2; index++) {
IncomingMessageEnvelope envelope = multiReader.readNext();
GenericRecord record = (GenericRecord) envelope.getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
offset = envelope.getOffset();
}
multiReader.close();
// read the rest of all files
multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), offset);
// skip one duplicate event
multiReader.readNext();
while (multiReader.hasNext()) {
IncomingMessageEnvelope envelope = multiReader.readNext();
GenericRecord record = (GenericRecord) envelope.getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
index++;
offset = envelope.getOffset();
}
Assert.assertEquals(3 * NUM_EVENTS, index);
multiReader.close();
// reopen with the offset of the last record
multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), offset);
// skip one duplicate event
multiReader.readNext();
Assert.assertFalse(multiReader.hasNext());
multiReader.close();
}
use of org.apache.samza.system.SystemStreamPartition in project samza by apache.
the class TestMultiFileHdfsReader method testOutOfRangeSingleFileOffset.
@Test(expected = SamzaException.class)
public void testOutOfRangeSingleFileOffset() {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:1000000&0");
Assert.fail();
}
Aggregations