Search in sources :

Example 1 with MediaInfoIdValue

use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.

the class EditBatchProcessorTest method testMultipleBatchesMediaInfo.

@Test
public void testMultipleBatchesMediaInfo() throws MediaWikiApiErrorException, InterruptedException, IOException {
    // Prepare test data
    MonolingualTextValue label = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
    List<MonolingualTextValue> labels = Collections.singletonList(label);
    TermUpdate labelsUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
    List<String> ids = new ArrayList<>();
    for (int i = 124; i < 190; i++) {
        ids.add("M" + String.valueOf(i));
    }
    List<MediaInfoIdValue> mids = ids.stream().map(e -> Datamodel.makeWikimediaCommonsMediaInfoIdValue(e)).collect(Collectors.toList());
    List<TermedStatementEntityEdit> batch = mids.stream().map(mid -> new TermedStatementEntityEditBuilder(mid).addLabel(label, false).build()).collect(Collectors.toList());
    int batchSize = 50;
    List<MediaInfoDocument> fullBatch = mids.stream().map(mid -> Datamodel.makeMediaInfoDocument(mid)).collect(Collectors.toList());
    List<MediaInfoDocument> firstBatch = fullBatch.subList(0, batchSize);
    List<MediaInfoDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
    when(fetcher.getEntityDocuments(toMids(firstBatch))).thenReturn(toMapMediaInfo(firstBatch));
    when(fetcher.getEntityDocuments(toMids(secondBatch))).thenReturn(toMapMediaInfo(secondBatch));
    // Run edits
    EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, batchSize);
    assertEquals(0, processor.progress());
    for (int i = 124; i < 190; i++) {
        assertEquals(processor.remainingEdits(), 190 - i);
        processor.performEdit();
    }
    assertEquals(0, processor.remainingEdits());
    assertEquals(100, processor.progress());
    // Check result
    assertEquals(new NewEntityLibrary(), library);
    verify(fetcher, times(1)).getEntityDocuments(toMids(firstBatch));
    verify(fetcher, times(1)).getEntityDocuments(toMids(secondBatch));
    for (MediaInfoDocument doc : fullBatch) {
        StatementUpdate statementUpdate = Datamodel.makeStatementUpdate(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
        verify(editor, times(1)).editEntityDocument(Datamodel.makeMediaInfoUpdate((MediaInfoIdValue) doc.getEntityId(), doc.getRevisionId(), labelsUpdate, statementUpdate), false, summary, tags);
    }
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) MediaInfoDocument(org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument) EntityDocument(org.wikidata.wdtk.datamodel.interfaces.EntityDocument) ArrayList(java.util.ArrayList) TermUpdate(org.wikidata.wdtk.datamodel.interfaces.TermUpdate) WikibaseDataFetcher(org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) Map(java.util.Map) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) BeforeMethod(org.testng.annotations.BeforeMethod) ItemDocumentBuilder(org.wikidata.wdtk.datamodel.helpers.ItemDocumentBuilder) WikibaseDataEditor(org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) Collectors(java.util.stream.Collectors) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) Mockito.verify(org.mockito.Mockito.verify) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) List(java.util.List) TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) Datamodel(org.wikidata.wdtk.datamodel.helpers.Datamodel) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) MediaWikiApiErrorException(org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException) Collections(java.util.Collections) TestingData(org.openrefine.wikidata.testing.TestingData) Mockito.mock(org.mockito.Mockito.mock) TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) MediaInfoDocument(org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) ArrayList(java.util.ArrayList) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) TermUpdate(org.wikidata.wdtk.datamodel.interfaces.TermUpdate) Test(org.testng.annotations.Test) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest)

Example 2 with MediaInfoIdValue

use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.

the class TermedStatementEntityEdit method toEntityUpdate.

/**
 * In case the subject id is not new, returns the corresponding update given
 * the current state of the entity.
 */
public EntityUpdate toEntityUpdate(EntityDocument entityDocument) {
    Validate.isFalse(isNew(), "Cannot create a corresponding entity update for a creation of a new entity.");
    if (id instanceof ItemIdValue) {
        ItemDocument itemDocument = (ItemDocument) entityDocument;
        // Labels
        List<MonolingualTextValue> labels = getLabels().stream().collect(Collectors.toList());
        labels.addAll(getLabelsIfNew().stream().filter(label -> !itemDocument.getLabels().containsKey(label.getLanguageCode())).collect(Collectors.toList()));
        TermUpdate labelUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
        // Descriptions
        List<MonolingualTextValue> descriptions = getDescriptions().stream().collect(Collectors.toList());
        descriptions.addAll(getDescriptionsIfNew().stream().filter(desc -> !itemDocument.getDescriptions().containsKey(desc.getLanguageCode())).collect(Collectors.toList()));
        TermUpdate descriptionUpdate = Datamodel.makeTermUpdate(descriptions, Collections.emptyList());
        // Aliases
        Set<MonolingualTextValue> aliases = getAliases();
        Map<String, List<MonolingualTextValue>> aliasesMap = aliases.stream().collect(Collectors.groupingBy(MonolingualTextValue::getLanguageCode));
        Map<String, AliasUpdate> aliasMap = aliasesMap.entrySet().stream().collect(Collectors.toMap(Entry::getKey, e -> Datamodel.makeAliasUpdate(e.getValue(), Collections.emptyList())));
        // Statements
        StatementUpdate statementUpdate = toStatementUpdate(itemDocument);
        return Datamodel.makeItemUpdate((ItemIdValue) getEntityId(), entityDocument.getRevisionId(), labelUpdate, descriptionUpdate, aliasMap, statementUpdate, Collections.emptyList(), Collections.emptyList());
    } else if (id instanceof MediaInfoIdValue) {
        MediaInfoDocument mediaInfoDocument = (MediaInfoDocument) entityDocument;
        // Labels (captions)
        List<MonolingualTextValue> labels = getLabels().stream().collect(Collectors.toList());
        labels.addAll(getLabelsIfNew().stream().filter(label -> !mediaInfoDocument.getLabels().containsKey(label.getLanguageCode())).collect(Collectors.toList()));
        TermUpdate labelUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
        // Statements
        StatementUpdate statementUpdate = toStatementUpdate(mediaInfoDocument);
        return Datamodel.makeMediaInfoUpdate((MediaInfoIdValue) id, entityDocument.getRevisionId(), labelUpdate, statementUpdate);
    } else {
        throw new NotImplementedException("Editing entities of type " + id.getEntityType() + " is not supported yet.");
    }
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) StatementUpdateBuilder(org.wikidata.wdtk.datamodel.helpers.StatementUpdateBuilder) HashMap(java.util.HashMap) MediaInfoDocument(org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument) EntityDocument(org.wikidata.wdtk.datamodel.interfaces.EntityDocument) ArrayList(java.util.ArrayList) StatementGroup(org.wikidata.wdtk.datamodel.interfaces.StatementGroup) TermUpdate(org.wikidata.wdtk.datamodel.interfaces.TermUpdate) HashSet(java.util.HashSet) StatementEditingMode(org.openrefine.wikidata.schema.strategies.StatementEditingMode) TermedStatementDocument(org.wikidata.wdtk.datamodel.interfaces.TermedStatementDocument) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) Map(java.util.Map) NotImplementedException(org.apache.commons.lang.NotImplementedException) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) EntityUpdate(org.wikidata.wdtk.datamodel.interfaces.EntityUpdate) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) StatementDocument(org.wikidata.wdtk.datamodel.interfaces.StatementDocument) LinkedList(java.util.LinkedList) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) Validate(org.jsoup.helper.Validate) Collection(java.util.Collection) PropertyIdValue(org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue) Set(java.util.Set) Statement(org.wikidata.wdtk.datamodel.interfaces.Statement) Collectors(java.util.stream.Collectors) AliasUpdate(org.wikidata.wdtk.datamodel.interfaces.AliasUpdate) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) List(java.util.List) Datamodel(org.wikidata.wdtk.datamodel.helpers.Datamodel) Entry(java.util.Map.Entry) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) Collections(java.util.Collections) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) MediaInfoDocument(org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument) NotImplementedException(org.apache.commons.lang.NotImplementedException) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) AliasUpdate(org.wikidata.wdtk.datamodel.interfaces.AliasUpdate) TermUpdate(org.wikidata.wdtk.datamodel.interfaces.TermUpdate)

Example 3 with MediaInfoIdValue

use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.

the class EditBatchProcessor method performEdit.

/**
 * Performs the next edit in the batch.
 *
 * @throws InterruptedException
 */
public void performEdit() throws InterruptedException {
    if (remainingEdits() == 0) {
        return;
    }
    if (batchCursor == currentBatch.size()) {
        prepareNewBatch();
    }
    TermedStatementEntityEdit update = currentBatch.get(batchCursor);
    // Rewrite mentions to new entities
    ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getEntityId());
    try {
        update = rewriter.rewrite(update);
    } catch (NewEntityNotCreatedYetException e) {
        logger.warn("Failed to rewrite update on entity " + update.getEntityId() + ". Missing entity: " + e.getMissingEntity() + ". Skipping update.");
        batchCursor++;
        return;
    }
    try {
        // New entities
        if (update.isNew()) {
            ReconEntityIdValue newCell = (ReconEntityIdValue) update.getEntityId();
            // TODO Antonin, 2022-02-11: remove this casting once we have https://github.com/Wikidata/Wikidata-Toolkit/issues/651
            if (newCell instanceof ItemIdValue) {
                update = update.normalizeLabelsAndAliases();
                ItemDocument itemDocument = (ItemDocument) update.toNewEntity();
                ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary, tags);
                library.setId(newCell.getReconInternalId(), createdDoc.getEntityId().getId());
            } else if (newCell instanceof MediaInfoIdValue) {
                update = update.normalizeLabelsAndAliases();
                throw new NotImplementedException();
            }
        } else {
            // Existing entities
            EntityUpdate entityUpdate = update.toEntityUpdate(currentDocs.get(update.getEntityId().getId()));
            editor.editEntityDocument(entityUpdate, false, summary, tags);
        }
    } catch (MediaWikiApiErrorException e) {
        // TODO find a way to report these errors to the user in a nice way
        logger.warn("MediaWiki error while editing [" + e.getErrorCode() + "]: " + e.getErrorMessage());
    } catch (IOException e) {
        logger.warn("IO error while editing: " + e.getMessage());
    }
    batchCursor++;
}
Also used : ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) EntityUpdate(org.wikidata.wdtk.datamodel.interfaces.EntityUpdate) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) NewEntityNotCreatedYetException(org.openrefine.wikidata.schema.exceptions.NewEntityNotCreatedYetException) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) NotImplementedException(org.apache.commons.lang.NotImplementedException) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) IOException(java.io.IOException) MediaWikiApiErrorException(org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException)

Example 4 with MediaInfoIdValue

use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.

the class WbEntityVariable method fromCell.

@Override
public EntityIdValue fromCell(Cell cell, ExpressionContext ctxt) throws SkipSchemaExpressionException {
    if (cell.recon != null && (Judgment.Matched.equals(cell.recon.judgment) || Judgment.New.equals(cell.recon.judgment))) {
        if (Judgment.New.equals(cell.recon.judgment)) {
            return new ReconItemIdValue(cell.recon, cell.value.toString());
        }
        EntityIdValue entityIdValue = EntityIdValueImpl.fromId(cell.recon.match.id, cell.recon.identifierSpace);
        EntityIdValue reconEntityIdValue = null;
        String entityType = null;
        if (entityIdValue instanceof ItemIdValue) {
            reconEntityIdValue = new ReconItemIdValue(cell.recon, cell.value.toString());
            entityType = "item";
        } else if (entityIdValue instanceof MediaInfoIdValue) {
            reconEntityIdValue = new ReconMediaInfoIdValue(cell.recon, cell.value.toString());
            entityType = "mediainfo";
        } else if (entityIdValue instanceof PropertyIdValue) {
            reconEntityIdValue = new ReconPropertyIdValue(cell.recon, cell.value.toString());
            entityType = "property";
        }
        if (reconEntityIdValue == null) {
            throw new SkipSchemaExpressionException();
        }
        if (cell.recon.identifierSpace == null || !cell.recon.identifierSpace.equals(ctxt.getBaseIRIForEntityType(entityType))) {
            QAWarning warning = new QAWarning("invalid-identifier-space", null, QAWarning.Severity.INFO, 1);
            warning.setProperty("example_cell", cell.value.toString());
            warning.setProperty("expected_site_iri", ctxt.getBaseIRIForEntityType(entityType));
            ctxt.addWarning(warning);
            throw new SkipSchemaExpressionException();
        }
        return reconEntityIdValue;
    }
    throw new SkipSchemaExpressionException();
}
Also used : ReconItemIdValue(org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) PropertyIdValue(org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue) ReconPropertyIdValue(org.openrefine.wikidata.schema.entityvalues.ReconPropertyIdValue) ReconItemIdValue(org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue) ReconMediaInfoIdValue(org.openrefine.wikidata.schema.entityvalues.ReconMediaInfoIdValue) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) SkipSchemaExpressionException(org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) ReconPropertyIdValue(org.openrefine.wikidata.schema.entityvalues.ReconPropertyIdValue) QAWarning(org.openrefine.wikidata.qa.QAWarning) ReconMediaInfoIdValue(org.openrefine.wikidata.schema.entityvalues.ReconMediaInfoIdValue)

Aggregations

ItemIdValue (org.wikidata.wdtk.datamodel.interfaces.ItemIdValue)4 MediaInfoIdValue (org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue)4 ItemDocument (org.wikidata.wdtk.datamodel.interfaces.ItemDocument)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 List (java.util.List)2 Map (java.util.Map)2 Collectors (java.util.stream.Collectors)2 NotImplementedException (org.apache.commons.lang.NotImplementedException)2 TermedStatementEntityEdit (org.openrefine.wikidata.updates.TermedStatementEntityEdit)2 Datamodel (org.wikidata.wdtk.datamodel.helpers.Datamodel)2 EntityDocument (org.wikidata.wdtk.datamodel.interfaces.EntityDocument)2 EntityIdValue (org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)2 EntityUpdate (org.wikidata.wdtk.datamodel.interfaces.EntityUpdate)2 MediaInfoDocument (org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument)2 MonolingualTextValue (org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue)2 PropertyIdValue (org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue)2 StatementUpdate (org.wikidata.wdtk.datamodel.interfaces.StatementUpdate)2 TermUpdate (org.wikidata.wdtk.datamodel.interfaces.TermUpdate)2