Search in sources :

Example 1 with ReconEntityIdValue

use of org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue in project OpenRefine by OpenRefine.

the class QuickStatementsUpdateScheduler method splitUpdate.

/**
 * Separates out the statements which refer to new entities from the rest of the
 * update. The resulting updates are stored in {@link referencingUpdates} and
 * {@link updatesWithoutReferences}.
 *
 * @param update
 * @throws ImpossibleSchedulingException
 *             if two new entity ids are referred to in the same statement
 */
protected void splitUpdate(TermedStatementEntityEdit update) throws ImpossibleSchedulingException {
    TermedStatementEntityEditBuilder remainingUpdateBuilder = new TermedStatementEntityEditBuilder(update.getEntityId()).addLabels(update.getLabels(), true).addLabels(update.getLabelsIfNew(), false).addDescriptions(update.getDescriptions(), true).addDescriptions(update.getDescriptionsIfNew(), false).addAliases(update.getAliases());
    Map<EntityIdValue, TermedStatementEntityEditBuilder> referencingUpdates = new HashMap<>();
    for (StatementEdit statement : update.getStatementEdits()) {
        Set<ReconEntityIdValue> pointers = extractor.extractPointers(statement.getStatement());
        if (pointers.isEmpty()) {
            remainingUpdateBuilder.addStatement(statement);
        } else if (pointers.size() == 1 && !update.isNew()) {
            EntityIdValue pointer = pointers.stream().findFirst().get();
            TermedStatementEntityEditBuilder referencingBuilder = referencingUpdates.get(pointer);
            if (referencingBuilder == null) {
                referencingBuilder = new TermedStatementEntityEditBuilder(update.getEntityId());
            }
            referencingBuilder.addStatement(statement);
            referencingUpdates.put(pointer, referencingBuilder);
        } else if (pointers.size() == 1 && pointers.stream().findFirst().get().equals(update.getEntityId())) {
            remainingUpdateBuilder.addStatement(statement);
        } else {
            throw new ImpossibleSchedulingException();
        }
    }
    // Add the update that is not referring to anything to the schedule
    TermedStatementEntityEdit pointerFree = remainingUpdateBuilder.build();
    if (!pointerFree.isNull()) {
        pointerFreeUpdates.add(pointerFree);
    }
    // Add the other updates to the map
    for (Entry<EntityIdValue, TermedStatementEntityEditBuilder> entry : referencingUpdates.entrySet()) {
        TermedStatementEntityEdit pointerUpdate = entry.getValue().build();
        UpdateSequence pointerUpdatesForKey = pointerUpdates.get(entry.getKey());
        if (pointerUpdatesForKey == null) {
            pointerUpdatesForKey = new UpdateSequence();
        }
        pointerUpdatesForKey.add(pointerUpdate);
        pointerUpdates.put(entry.getKey(), pointerUpdatesForKey);
    }
}
Also used : TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) HashMap(java.util.HashMap) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) StatementEdit(org.openrefine.wikidata.updates.StatementEdit)

Example 2 with ReconEntityIdValue

use of org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue in project OpenRefine by OpenRefine.

the class QSValuePrinterTest method printNewItemId.

@Test
public void printNewItemId() {
    ReconEntityIdValue id = TestingData.makeNewItemIdValue(12345L, "my new item");
    assertEquals("LAST", id.accept(printer));
    // because no entity was previously created
    ReconEntityIdValue differentId = TestingData.makeMatchedItemIdValue("Q78", "my existing item");
    assertEquals("Q78", differentId.accept(printer));
}
Also used : ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) Test(org.testng.annotations.Test)

Example 3 with ReconEntityIdValue

use of org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue in project OpenRefine by OpenRefine.

the class WikibaseAPIUpdateScheduler method splitUpdate.

/**
 * Splits an update into two parts
 *
 * @param update
 */
protected void splitUpdate(TermedStatementEntityEdit update) {
    TermedStatementEntityEditBuilder pointerFreeBuilder = new TermedStatementEntityEditBuilder(update.getEntityId()).addLabels(update.getLabels(), true).addLabels(update.getLabelsIfNew(), false).addDescriptions(update.getDescriptions(), true).addDescriptions(update.getDescriptionsIfNew(), false).addAliases(update.getAliases());
    TermedStatementEntityEditBuilder pointerFullBuilder = new TermedStatementEntityEditBuilder(update.getEntityId());
    for (StatementEdit statement : update.getStatementEdits()) {
        Set<ReconEntityIdValue> pointers = extractor.extractPointers(statement.getStatement());
        if (pointers.isEmpty()) {
            pointerFreeBuilder.addStatement(statement);
        } else {
            pointerFullBuilder.addStatement(statement);
        }
        allPointers.addAll(pointers);
    }
    if (update.isNew()) {
        // If the update is new, we might need to split it
        // in two (if it refers to any other new entity).
        TermedStatementEntityEdit pointerFree = pointerFreeBuilder.build();
        if (!pointerFree.isNull()) {
            pointerFreeUpdates.add(pointerFree);
        }
        TermedStatementEntityEdit pointerFull = pointerFullBuilder.build();
        if (!pointerFull.isEmpty()) {
            pointerFullUpdates.add(pointerFull);
        }
    } else {
        // Otherwise, we just make sure this edit is done after
        // all entity creations.
        pointerFullUpdates.add(update);
    }
}
Also used : TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) StatementEdit(org.openrefine.wikidata.updates.StatementEdit)

Example 4 with ReconEntityIdValue

use of org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue in project OpenRefine by OpenRefine.

the class EditBatchProcessor method performEdit.

/**
 * Performs the next edit in the batch.
 *
 * @throws InterruptedException
 */
public void performEdit() throws InterruptedException {
    if (remainingEdits() == 0) {
        return;
    }
    if (batchCursor == currentBatch.size()) {
        prepareNewBatch();
    }
    TermedStatementEntityEdit update = currentBatch.get(batchCursor);
    // Rewrite mentions to new entities
    ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getEntityId());
    try {
        update = rewriter.rewrite(update);
    } catch (NewEntityNotCreatedYetException e) {
        logger.warn("Failed to rewrite update on entity " + update.getEntityId() + ". Missing entity: " + e.getMissingEntity() + ". Skipping update.");
        batchCursor++;
        return;
    }
    try {
        // New entities
        if (update.isNew()) {
            ReconEntityIdValue newCell = (ReconEntityIdValue) update.getEntityId();
            // TODO Antonin, 2022-02-11: remove this casting once we have https://github.com/Wikidata/Wikidata-Toolkit/issues/651
            if (newCell instanceof ItemIdValue) {
                update = update.normalizeLabelsAndAliases();
                ItemDocument itemDocument = (ItemDocument) update.toNewEntity();
                ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary, tags);
                library.setId(newCell.getReconInternalId(), createdDoc.getEntityId().getId());
            } else if (newCell instanceof MediaInfoIdValue) {
                update = update.normalizeLabelsAndAliases();
                throw new NotImplementedException();
            }
        } else {
            // Existing entities
            EntityUpdate entityUpdate = update.toEntityUpdate(currentDocs.get(update.getEntityId().getId()));
            editor.editEntityDocument(entityUpdate, false, summary, tags);
        }
    } catch (MediaWikiApiErrorException e) {
        // TODO find a way to report these errors to the user in a nice way
        logger.warn("MediaWiki error while editing [" + e.getErrorCode() + "]: " + e.getErrorMessage());
    } catch (IOException e) {
        logger.warn("IO error while editing: " + e.getMessage());
    }
    batchCursor++;
}
Also used : ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) EntityUpdate(org.wikidata.wdtk.datamodel.interfaces.EntityUpdate) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) NewEntityNotCreatedYetException(org.openrefine.wikidata.schema.exceptions.NewEntityNotCreatedYetException) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) NotImplementedException(org.apache.commons.lang.NotImplementedException) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) IOException(java.io.IOException) MediaWikiApiErrorException(org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException)

Aggregations

ReconEntityIdValue (org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue)4 TermedStatementEntityEdit (org.openrefine.wikidata.updates.TermedStatementEntityEdit)3 StatementEdit (org.openrefine.wikidata.updates.StatementEdit)2 TermedStatementEntityEditBuilder (org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 NotImplementedException (org.apache.commons.lang.NotImplementedException)1 NewEntityNotCreatedYetException (org.openrefine.wikidata.schema.exceptions.NewEntityNotCreatedYetException)1 Test (org.testng.annotations.Test)1 EntityIdValue (org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)1 EntityUpdate (org.wikidata.wdtk.datamodel.interfaces.EntityUpdate)1 ItemDocument (org.wikidata.wdtk.datamodel.interfaces.ItemDocument)1 ItemIdValue (org.wikidata.wdtk.datamodel.interfaces.ItemIdValue)1 MediaInfoIdValue (org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue)1 MediaWikiApiErrorException (org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException)1