Search in sources :

Example 1 with TermedStatementEntityEdit

use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.

the class PreviewWikibaseSchemaCommand method doPost.

/**
 * This command uses POST but is left CSRF-unprotected since it does not
 * incur a side effect or state change in the backend.
 * The reason why it uses POST is to make sure large schemas and engines
 * can be passed as parameters.
 */
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    try {
        Project project = getProject(request);
        response.setCharacterEncoding("UTF-8");
        response.setHeader("Content-Type", "application/json");
        String schemaJson = request.getParameter("schema");
        WikibaseSchema schema = null;
        if (schemaJson != null) {
            try {
                schema = WikibaseSchema.reconstruct(schemaJson);
            } catch (IOException e) {
                respondError(response, "Wikibase schema could not be parsed. Error message: " + e.getMessage());
                return;
            }
        } else {
            schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
        }
        if (schema == null) {
            respondError(response, "No Wikibase schema provided.");
            return;
        }
        Manifest manifest = null;
        String manifestJson = request.getParameter("manifest");
        if (manifestJson != null) {
            try {
                manifest = ManifestParser.parse(manifestJson);
            } catch (ManifestException e) {
                respondError(response, "Wikibase manifest could not be parsed. Error message: " + e.getMessage());
                return;
            }
        }
        if (manifest == null) {
            respondError(response, "No Wikibase manifest provided.");
            return;
        }
        QAWarningStore warningStore = new QAWarningStore();
        // Evaluate project
        Engine engine = getEngine(request, project);
        List<TermedStatementEntityEdit> editBatch = schema.evaluate(project, engine, warningStore);
        // Inspect the edits and generate warnings
        EditInspector inspector = new EditInspector(warningStore, manifest);
        inspector.inspect(editBatch, schema);
        // Dump the first 10 edits, scheduled with the default scheduler
        WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
        List<TermedStatementEntityEdit> nonNullEdits = scheduler.schedule(editBatch).stream().filter(e -> !e.isNull()).collect(Collectors.toList());
        List<TermedStatementEntityEdit> firstEdits = nonNullEdits.stream().limit(10).collect(Collectors.toList());
        PreviewResults previewResults = new PreviewResults(warningStore.getWarnings(), warningStore.getMaxSeverity(), warningStore.getNbWarnings(), nonNullEdits.size(), firstEdits);
        respondJSON(response, previewResults);
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : Project(com.google.refine.model.Project) ServletException(javax.servlet.ServletException) ManifestException(org.openrefine.wikidata.manifests.ManifestException) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) HttpServletResponse(javax.servlet.http.HttpServletResponse) IOException(java.io.IOException) EditInspector(org.openrefine.wikidata.qa.EditInspector) Collectors(java.util.stream.Collectors) ManifestParser(org.openrefine.wikidata.manifests.ManifestParser) List(java.util.List) HttpServletRequest(javax.servlet.http.HttpServletRequest) WikibaseSchema(org.openrefine.wikidata.schema.WikibaseSchema) WikibaseAPIUpdateScheduler(org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler) CommandUtilities.respondError(org.openrefine.wikidata.commands.CommandUtilities.respondError) QAWarningStore(org.openrefine.wikidata.qa.QAWarningStore) Command(com.google.refine.commands.Command) Engine(com.google.refine.browsing.Engine) Manifest(org.openrefine.wikidata.manifests.Manifest) IOException(java.io.IOException) Manifest(org.openrefine.wikidata.manifests.Manifest) ManifestException(org.openrefine.wikidata.manifests.ManifestException) ServletException(javax.servlet.ServletException) ManifestException(org.openrefine.wikidata.manifests.ManifestException) IOException(java.io.IOException) Project(com.google.refine.model.Project) EditInspector(org.openrefine.wikidata.qa.EditInspector) WikibaseAPIUpdateScheduler(org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) WikibaseSchema(org.openrefine.wikidata.schema.WikibaseSchema) QAWarningStore(org.openrefine.wikidata.qa.QAWarningStore) Engine(com.google.refine.browsing.Engine)

Example 2 with TermedStatementEntityEdit

use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.

the class ReconEntityRewriter method rewrite.

/**
 * Rewrite an edit, replacing references to all entities already
 * created by their fresh identifiers. The subject id might not have been
 * created already, in which case it will be left untouched. All the other
 * entities need to have been created already.
 *
 * @param edit
 *      the edit to rewrite
 * @return
 *      the rewritten update
 * @throws NewEntityNotCreatedYetException
 *      if any non-subject entity had not been created yet
 */
public TermedStatementEntityEdit rewrite(TermedStatementEntityEdit edit) throws NewEntityNotCreatedYetException {
    try {
        EntityIdValue subject = (EntityIdValue) copyValue(edit.getEntityId());
        Set<MonolingualTextValue> labels = edit.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
        Set<MonolingualTextValue> labelsIfNew = edit.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
        Set<MonolingualTextValue> descriptions = edit.getDescriptions().stream().map(l -> copy(l)).collect(Collectors.toSet());
        Set<MonolingualTextValue> descriptionsIfNew = edit.getDescriptionsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
        Set<MonolingualTextValue> aliases = edit.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
        List<StatementEdit> addedStatements = edit.getStatementEdits().stream().map(l -> copy(l)).collect(Collectors.toList());
        return new TermedStatementEntityEdit(subject, addedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
    } catch (MissingEntityIdFound e) {
        throw new NewEntityNotCreatedYetException(e.value);
    }
}
Also used : NewEntityNotCreatedYetException(org.openrefine.wikidata.schema.exceptions.NewEntityNotCreatedYetException) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) DataObjectFactoryImpl(org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl) ReconMediaInfoIdValue(org.openrefine.wikidata.schema.entityvalues.ReconMediaInfoIdValue) PropertyIdValue(org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue) Set(java.util.Set) ReconPropertyIdValue(org.openrefine.wikidata.schema.entityvalues.ReconPropertyIdValue) Statement(org.wikidata.wdtk.datamodel.interfaces.Statement) Collectors(java.util.stream.Collectors) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) List(java.util.List) StatementEdit(org.openrefine.wikidata.updates.StatementEdit) ReconItemIdValue(org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) Datamodel(org.wikidata.wdtk.datamodel.helpers.Datamodel) DatamodelConverter(org.wikidata.wdtk.datamodel.helpers.DatamodelConverter) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) NewEntityNotCreatedYetException(org.openrefine.wikidata.schema.exceptions.NewEntityNotCreatedYetException) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) StatementEdit(org.openrefine.wikidata.updates.StatementEdit)

Example 3 with TermedStatementEntityEdit

use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.

the class WikibaseSchema method evaluate.

/**
 * Evaluates the schema on a project, returning a list of EntityUpdates generated
 * by the schema.
 *
 * Some warnings will be emitted in the warning store: those are only the ones
 * that are generated at evaluation time (such as invalid formats for dates).
 * Issues detected on candidate statements (such as constraint violations) are
 * not included at this stage.
 *
 * @param project
 *            the project on which the schema should be evaluated
 * @param engine
 *            the engine, which gives access to the current facets
 * @param warningStore
 *            a store in which issues will be emitted
 * @return entity updates are stored in their generating order (not merged yet).
 */
public List<TermedStatementEntityEdit> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
    List<TermedStatementEntityEdit> result = new ArrayList<>();
    FilteredRows filteredRows = engine.getAllFilteredRows();
    filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
    return result;
}
Also used : TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows)

Example 4 with TermedStatementEntityEdit

use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.

the class EditBatchProcessorTest method testMultipleBatches.

@Test
public void testMultipleBatches() throws MediaWikiApiErrorException, InterruptedException, IOException {
    // Prepare test data
    MonolingualTextValue description = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
    List<String> ids = new ArrayList<>();
    for (int i = 124; i < 190; i++) {
        ids.add("Q" + String.valueOf(i));
    }
    List<ItemIdValue> qids = ids.stream().map(e -> Datamodel.makeWikidataItemIdValue(e)).collect(Collectors.toList());
    List<TermedStatementEntityEdit> batch = qids.stream().map(qid -> new TermedStatementEntityEditBuilder(qid).addDescription(description, true).build()).collect(Collectors.toList());
    int batchSize = 50;
    List<ItemDocument> fullBatch = qids.stream().map(qid -> ItemDocumentBuilder.forItemId(qid).withStatement(TestingData.generateStatement(qid, TestingData.existingId)).build()).collect(Collectors.toList());
    List<ItemDocument> firstBatch = fullBatch.subList(0, batchSize);
    List<ItemDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
    when(fetcher.getEntityDocuments(toQids(firstBatch))).thenReturn(toMap(firstBatch));
    when(fetcher.getEntityDocuments(toQids(secondBatch))).thenReturn(toMap(secondBatch));
    // Run edits
    EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, batchSize);
    assertEquals(0, processor.progress());
    for (int i = 124; i < 190; i++) {
        assertEquals(processor.remainingEdits(), 190 - i);
        processor.performEdit();
    }
    assertEquals(0, processor.remainingEdits());
    assertEquals(100, processor.progress());
    // Check result
    assertEquals(new NewEntityLibrary(), library);
    verify(fetcher, times(1)).getEntityDocuments(toQids(firstBatch));
    verify(fetcher, times(1)).getEntityDocuments(toQids(secondBatch));
    for (ItemDocument doc : fullBatch) {
        verify(editor, times(1)).editEntityDocument(Datamodel.makeItemUpdate(doc.getEntityId(), doc.getRevisionId(), Datamodel.makeTermUpdate(Collections.emptyList(), Collections.emptyList()), Datamodel.makeTermUpdate(Collections.singletonList(description), Collections.emptyList()), Collections.emptyMap(), Datamodel.makeStatementUpdate(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()), Collections.emptyList(), Collections.emptyList()), false, summary, tags);
    }
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) MediaInfoDocument(org.wikidata.wdtk.datamodel.interfaces.MediaInfoDocument) EntityDocument(org.wikidata.wdtk.datamodel.interfaces.EntityDocument) ArrayList(java.util.ArrayList) TermUpdate(org.wikidata.wdtk.datamodel.interfaces.TermUpdate) WikibaseDataFetcher(org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) Map(java.util.Map) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) BeforeMethod(org.testng.annotations.BeforeMethod) ItemDocumentBuilder(org.wikidata.wdtk.datamodel.helpers.ItemDocumentBuilder) WikibaseDataEditor(org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) Collectors(java.util.stream.Collectors) MediaInfoIdValue(org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue) Mockito.verify(org.mockito.Mockito.verify) StatementUpdate(org.wikidata.wdtk.datamodel.interfaces.StatementUpdate) List(java.util.List) TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) Datamodel(org.wikidata.wdtk.datamodel.helpers.Datamodel) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) MediaWikiApiErrorException(org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException) Collections(java.util.Collections) TestingData(org.openrefine.wikidata.testing.TestingData) Mockito.mock(org.mockito.Mockito.mock) TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) ArrayList(java.util.ArrayList) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) ItemIdValue(org.wikidata.wdtk.datamodel.interfaces.ItemIdValue) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) Test(org.testng.annotations.Test) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest)

Example 5 with TermedStatementEntityEdit

use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.

the class EditBatchProcessorTest method testNewItem.

@Test
public void testNewItem() throws InterruptedException, MediaWikiApiErrorException, IOException {
    List<TermedStatementEntityEdit> batch = new ArrayList<>();
    batch.add(new TermedStatementEntityEditBuilder(TestingData.existingId).addAlias(Datamodel.makeMonolingualTextValue("my new alias", "en")).addStatement(TestingData.generateStatementAddition(TestingData.existingId, TestingData.newIdA)).build());
    MonolingualTextValue label = Datamodel.makeMonolingualTextValue("better label", "en");
    batch.add(new TermedStatementEntityEditBuilder(TestingData.newIdA).addAlias(label).build());
    // Plan expected edits
    ItemDocument existingItem = ItemDocumentBuilder.forItemId(TestingData.existingId).withLabel(Datamodel.makeMonolingualTextValue("pomme", "fr")).withDescription(Datamodel.makeMonolingualTextValue("fruit délicieux", "fr")).build();
    when(fetcher.getEntityDocuments(Collections.singletonList(TestingData.existingId.getId()))).thenReturn(Collections.singletonMap(TestingData.existingId.getId(), existingItem));
    ItemDocument expectedNewItem = ItemDocumentBuilder.forItemId(TestingData.newIdA).withLabel(label).build();
    ItemDocument createdNewItem = ItemDocumentBuilder.forItemId(Datamodel.makeWikidataItemIdValue("Q1234")).withLabel(label).withRevisionId(37828L).build();
    when(editor.createItemDocument(expectedNewItem, summary, tags)).thenReturn(createdNewItem);
    EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, 50);
    assertEquals(2, processor.remainingEdits());
    assertEquals(0, processor.progress());
    processor.performEdit();
    assertEquals(1, processor.remainingEdits());
    assertEquals(50, processor.progress());
    processor.performEdit();
    assertEquals(0, processor.remainingEdits());
    assertEquals(100, processor.progress());
    // does not do anything
    processor.performEdit();
    assertEquals(0, processor.remainingEdits());
    assertEquals(100, processor.progress());
    NewEntityLibrary expectedLibrary = new NewEntityLibrary();
    expectedLibrary.setId(1234L, "Q1234");
    assertEquals(expectedLibrary, library);
}
Also used : TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) ItemDocument(org.wikidata.wdtk.datamodel.interfaces.ItemDocument) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) ArrayList(java.util.ArrayList) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) Test(org.testng.annotations.Test) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest)

Aggregations

TermedStatementEntityEdit (org.openrefine.wikidata.updates.TermedStatementEntityEdit)109 TermedStatementEntityEditBuilder (org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder)102 Test (org.testng.annotations.Test)97 ItemIdValue (org.wikidata.wdtk.datamodel.interfaces.ItemIdValue)59 Statement (org.wikidata.wdtk.datamodel.interfaces.Statement)57 ConstraintFetcher (org.openrefine.wikidata.qa.ConstraintFetcher)52 Snak (org.wikidata.wdtk.datamodel.interfaces.Snak)49 StatementImpl (org.wikidata.wdtk.datamodel.implementation.StatementImpl)44 SnakGroup (org.wikidata.wdtk.datamodel.interfaces.SnakGroup)34 ValueSnak (org.wikidata.wdtk.datamodel.interfaces.ValueSnak)18 WikidataRefineTest (org.openrefine.wikidata.testing.WikidataRefineTest)13 ArrayList (java.util.ArrayList)12 StatementEdit (org.openrefine.wikidata.updates.StatementEdit)9 EntityIdValue (org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)6 ReconEntityIdValue (org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue)5 NoValueSnak (org.wikidata.wdtk.datamodel.interfaces.NoValueSnak)5 PropertyIdValue (org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue)5 IOException (java.io.IOException)4 List (java.util.List)4 Collectors (java.util.stream.Collectors)4