Search in sources :

Example 6 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class TestUpdater method handleChanges.

@Override
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
    for (Change change : changes) {
        log.info("C: {} {}", change.entityId(), change);
        Long old = updates.put(change.entityId(), change.revision());
        if (old == null) {
            continue;
        }
        if (old > change.revision() && change.revision() != Change.NO_REVISION) {
            log.info("Old revision on {}: had {}, arrived {}", change.entityId(), old, change.revision());
            updates.put(change.entityId(), old);
        }
        if (old == change.revision() && old != Change.NO_REVISION) {
            log.info("Duplicate revision on {}: {}", change.entityId(), old);
        }
    }
}
Also used : Change(org.wikidata.query.rdf.tool.change.Change)

Example 7 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class WikibaseRepositoryWireIntegrationTest method rdfOldUsesNocache.

@Test
public void rdfOldUsesNocache() throws RetryableException {
    repository.setCollectConstraints(false);
    repository.setRevisionCutoff(Duration.of(3, ChronoUnit.HOURS));
    stubFor(get(urlMatching("/wiki/Special:EntityData/Q2.ttl[?]flavor=dump&nocache=[0-9]+")).willReturn(aResponse().withBody("<a> <b> <c> .")));
    Collection<Statement> response = repository.fetchRdfForEntity(new Change("Q2", 1234, Instant.now().minus(5, ChronoUnit.DAYS), 0));
    assertThat(response, hasSize(1));
}
Also used : Statement(org.openrdf.model.Statement) Change(org.wikidata.query.rdf.tool.change.Change) Test(org.junit.Test)

Example 8 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class WikibaseRepositoryWireIntegrationTest method rdfNewWithoutCutoff.

@Test
public void rdfNewWithoutCutoff() throws RetryableException {
    repository.setCollectConstraints(false);
    repository.setRevisionCutoff(Duration.ZERO);
    stubFor(get(urlMatching("/wiki/Special:EntityData/Q2.ttl[?]flavor=dump&nocache=[0-9]+")).willReturn(aResponse().withBody("<a> <b> <c> .")));
    Collection<Statement> response = repository.fetchRdfForEntity(new Change("Q2", 1234, Instant.now().minus(5, ChronoUnit.MINUTES), 0));
    assertThat(response, hasSize(1));
}
Also used : Statement(org.openrdf.model.Statement) Change(org.wikidata.query.rdf.tool.change.Change) Test(org.junit.Test)

Example 9 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class RdfRepositoryUnitTest method batchUpdate.

@Test
public void batchUpdate() {
    RdfClient mockClient = mock(RdfClient.class);
    // 1.5M size means ~4k statements or 250K statement size max
    long maxPostSize = 1572864L;
    CollectedUpdateMetrics collectedUpdateMetrics = new CollectedUpdateMetrics();
    collectedUpdateMetrics.setMutationCount(1);
    collectedUpdateMetrics.merge(MultiSyncStep.INSERT_NEW_DATA, UpdateMetrics.builder().build());
    when(mockClient.update(any(String.class), any(UpdateMetricsResponseHandler.class))).thenReturn(collectedUpdateMetrics);
    RdfRepository repo = new RdfRepository(uris, mockClient, maxPostSize);
    // 6000 statements - should go over the limit
    Change change1 = new Change("Q1", 1, Instant.EPOCH, 1);
    StatementBuilder sb = new StatementBuilder("Q1");
    for (int i = 0; i < 6000; i++) {
        sb.withPredicateObject(RDFS.LABEL, new LiteralImpl("some item " + i));
    }
    change1.setStatements(sb.build());
    // One statement with 300K data - should go over the limit
    Change change2 = new Change("Q2", 1, Instant.EPOCH, 1);
    List<Statement> statements2 = new StatementBuilder("Q2").withPredicateObject(RDFS.LABEL, new LiteralImpl(randomizer.randomAsciiOfLength(300 * 1024))).build();
    change2.setStatements(statements2);
    // Just one statement - this will be separated anyway
    Change change3 = new Change("Q3", 1, Instant.EPOCH, 1);
    List<Statement> statements3 = new StatementBuilder("Q3").withPredicateObject(RDFS.LABEL, new LiteralImpl("third item")).build();
    change3.setStatements(statements3);
    List<Change> changes = ImmutableList.of(change1, change2, change3);
    int count = repo.syncFromChanges(changes, false).getMutationCount();
    assertThat(count).isEqualTo(3);
    // We should get 3 calls to update
    verify(mockClient, times(3)).update(any(), any());
}
Also used : Statement(org.openrdf.model.Statement) RdfClient(org.wikidata.query.rdf.tool.rdf.client.RdfClient) Change(org.wikidata.query.rdf.tool.change.Change) LiteralImpl(org.openrdf.model.impl.LiteralImpl) UpdateMetricsResponseHandler(org.wikidata.query.rdf.tool.rdf.client.UpdateMetricsResponseHandler) StatementBuilder(org.wikidata.query.rdf.test.StatementHelper.StatementBuilder) Test(org.junit.Test)

Example 10 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class WikibaseRepositoryIntegrationTest method continueWorks.

@Test
public void continueWorks() throws ContainedException, InterruptedException, URISyntaxException, IOException, RetryableException {
    String entityId = firstEntityIdForLabelStartingWith(baseUri, "QueryTestItem", "en", "item");
    List<RecentChange> changes = getRecentChanges(START_TIME.minusSeconds(10), 10);
    Change change = null;
    Long oldRevid = 0L;
    Long oldRcid = 0L;
    for (RecentChange rc : changes) {
        if (rc.getTitle().equals(entityId)) {
            oldRevid = rc.getRevId();
            oldRcid = rc.getRcId();
            change = new Change(rc.getTitle(), oldRevid, rc.getTimestamp(), oldRcid);
            break;
        }
    }
    assertNotNull("Did not find the first edit", change);
    // Ensure this change is in different second
    // make new edit now
    changes = getRecentChanges(change.timestamp().plusSeconds(1), 10);
    // check that new result does not contain old edit but contains new edit
    boolean found = false;
    for (RecentChange rc : changes) {
        if (rc.getTitle().equals(entityId)) {
            assertNotEquals("Found old edit after continue: revid", oldRevid, rc.getRevId());
            assertNotEquals("Found old edit after continue: offset", oldRcid, rc.getRcId());
            found = true;
        }
    }
    assertTrue("Did not find new edit", found);
}
Also used : RecentChange(org.wikidata.query.rdf.tool.wikibase.RecentChangeResponse.RecentChange) RecentChange(org.wikidata.query.rdf.tool.wikibase.RecentChangeResponse.RecentChange) Change(org.wikidata.query.rdf.tool.change.Change) Test(org.junit.Test)

Aggregations

Change (org.wikidata.query.rdf.tool.change.Change)14 Test (org.junit.Test)7 Statement (org.openrdf.model.Statement)5 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2 ContainedException (org.wikidata.query.rdf.tool.exception.ContainedException)2 RetryableException (org.wikidata.query.rdf.tool.exception.RetryableException)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Instant (java.time.Instant)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1 ExecutorService (java.util.concurrent.ExecutorService)1 LiteralImpl (org.openrdf.model.impl.LiteralImpl)1 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)1 StatementBuilder (org.wikidata.query.rdf.test.StatementHelper.StatementBuilder)1 FatalException (org.wikidata.query.rdf.tool.exception.FatalException)1