Search in sources :

Example 6 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method batchSize.

@Test
public void batchSize() throws RetryableException {
    KafkaPoller poller = makePoller();
    ConsumerRecords<String, ChangeEvent> rs1 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q1"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(20), 4, 5, "Q2"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(20), 5, 5, "Q3"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(20), 6, 5, "Q4"), "topictest", Duration.ofMillis(20)));
    ConsumerRecords<String, ChangeEvent> rs2 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 5, 10, "Q3"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 20, "Q1"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 100, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 4, 20, "Q2"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 20, "Q1"), "othertopic", Duration.ofMillis(21)));
    ConsumerRecords<String, ChangeEvent> rs3 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 5, 100, "Q3"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 200, "Q1"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 100, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 7, 200, "Q5"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(30), 8, 200, "Q6"), "othertopic", Duration.ofMillis(21)));
    ConsumerRecords<String, ChangeEvent> rs4 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 9, 2, "Q7"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
    when(consumer.poll(anyLong())).thenReturn(rs1, rs2, rs3, rs4, EMPTY_CHANGES);
    Batch batch = poller.firstBatch();
    // The batch should stop as soon as we got over size 5
    assertThat(batch.changes()).hasSize(6).anyMatch(titleRevision("Q1", 200)).anyMatch(titleRevision("Q2", 20)).anyMatch(titleRevision("Q3", 100)).anyMatch(titleRevision("Q4", 5)).anyMatch(titleRevision("Q5", 200)).anyMatch(titleRevision("Q6", 200)).noneMatch(title("Q7"));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Example 7 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method filterOtherChanges.

@Test
public void filterOtherChanges() throws RetryableException {
    ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
    Batch batch = getBatchFromRecords(rs);
    // There should be only one change, and it should have max revision
    assertThat(batch.changes()).hasSize(1).anyMatch(title("Q123"));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Example 8 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method advanceTimestamp.

@Test
public void advanceTimestamp() throws RetryableException {
    ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 1, "Q123"), "mediawiki.revision-create", Duration.ofMillis(120000)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q234"), "mediawiki.revision-create", Duration.ofMillis(122000)), makeRecord(makeDeleteEvent(Duration.ofMillis(21), 1, "Q123"), "othertopic", Duration.ofMillis(121000)), makeRecord(makeDeleteEvent(Duration.ofMillis(22), 2, "Q234"), "othertopic", Duration.ofMillis(122000)), makeRecord(makeDeleteEvent(Duration.ofMillis(31), 1, "Q123"), "othertopic", Duration.ofMillis(123000)));
    Batch batch = getBatchFromRecords(rs);
    // Advancement is minimum over maximal times of the topics
    assertThat(batch.advanced()).isEqualTo(122000L);
    assertThat(batch.leftOffDate()).isEqualTo(START_TIME.plusMillis(122000L));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Example 9 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method changesOrder.

@Test
public void changesOrder() throws RetryableException {
    ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 2, "Q123"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 1, 10, "Q123"), "othertopic", Duration.ofMillis(15)), makeRecord(makeRCEvent(Duration.ofMillis(35), 1, 7, "Q123"), "topictest", Duration.ofMillis(25)));
    Batch batch = getBatchFromRecords(rs);
    // There should be only one change, and it should have max revision
    assertThat(batch.changes()).hasSize(1).anyMatch(revision(10));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Example 10 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method deleteRevision.

@Test
public void deleteRevision() throws RetryableException {
    ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 1, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeDeleteEvent(Duration.ofMillis(21), 1, "Q123"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(22), 1, 2, "Q123"), "topictest", Duration.ofMillis(22)));
    Batch batch = getBatchFromRecords(rs);
    // Delete revision should always win
    assertThat(batch.changes()).hasSize(1).anyMatch(revision(-1));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)13 Batch (org.wikidata.query.rdf.tool.change.KafkaPoller.Batch)13 ChangeEvent (org.wikidata.query.rdf.tool.change.events.ChangeEvent)12 MetricRegistry (com.codahale.metrics.MetricRegistry)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 ArrayList (java.util.ArrayList)4 Collections.singletonList (java.util.Collections.singletonList)4 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 Collectors.toList (java.util.stream.Collectors.toList)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)4 TopicPartition (org.apache.kafka.common.TopicPartition)4 Maps (com.google.common.collect.Maps)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 Duration (java.time.Duration)3 Arrays (java.util.Arrays)3