use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.
the class PreviewWikibaseSchemaCommand method doPost.
/**
* This command uses POST but is left CSRF-unprotected since it does not
* incur a side effect or state change in the backend.
* The reason why it uses POST is to make sure large schemas and engines
* can be passed as parameters.
*/
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
try {
Project project = getProject(request);
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
String schemaJson = request.getParameter("schema");
WikibaseSchema schema = null;
if (schemaJson != null) {
try {
schema = WikibaseSchema.reconstruct(schemaJson);
} catch (IOException e) {
respondError(response, "Wikibase schema could not be parsed. Error message: " + e.getMessage());
return;
}
} else {
schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
}
if (schema == null) {
respondError(response, "No Wikibase schema provided.");
return;
}
Manifest manifest = null;
String manifestJson = request.getParameter("manifest");
if (manifestJson != null) {
try {
manifest = ManifestParser.parse(manifestJson);
} catch (ManifestException e) {
respondError(response, "Wikibase manifest could not be parsed. Error message: " + e.getMessage());
return;
}
}
if (manifest == null) {
respondError(response, "No Wikibase manifest provided.");
return;
}
QAWarningStore warningStore = new QAWarningStore();
// Evaluate project
Engine engine = getEngine(request, project);
List<TermedStatementEntityEdit> editBatch = schema.evaluate(project, engine, warningStore);
// Inspect the edits and generate warnings
EditInspector inspector = new EditInspector(warningStore, manifest);
inspector.inspect(editBatch, schema);
// Dump the first 10 edits, scheduled with the default scheduler
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
List<TermedStatementEntityEdit> nonNullEdits = scheduler.schedule(editBatch).stream().filter(e -> !e.isNull()).collect(Collectors.toList());
List<TermedStatementEntityEdit> firstEdits = nonNullEdits.stream().limit(10).collect(Collectors.toList());
PreviewResults previewResults = new PreviewResults(warningStore.getWarnings(), warningStore.getMaxSeverity(), warningStore.getNbWarnings(), nonNullEdits.size(), firstEdits);
respondJSON(response, previewResults);
} catch (Exception e) {
respondException(response, e);
}
}
use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.
the class ReconEntityRewriter method rewrite.
/**
* Rewrite an edit, replacing references to all entities already
* created by their fresh identifiers. The subject id might not have been
* created already, in which case it will be left untouched. All the other
* entities need to have been created already.
*
* @param edit
* the edit to rewrite
* @return
* the rewritten update
* @throws NewEntityNotCreatedYetException
* if any non-subject entity had not been created yet
*/
public TermedStatementEntityEdit rewrite(TermedStatementEntityEdit edit) throws NewEntityNotCreatedYetException {
try {
EntityIdValue subject = (EntityIdValue) copyValue(edit.getEntityId());
Set<MonolingualTextValue> labels = edit.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> labelsIfNew = edit.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> descriptions = edit.getDescriptions().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> descriptionsIfNew = edit.getDescriptionsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> aliases = edit.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
List<StatementEdit> addedStatements = edit.getStatementEdits().stream().map(l -> copy(l)).collect(Collectors.toList());
return new TermedStatementEntityEdit(subject, addedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
} catch (MissingEntityIdFound e) {
throw new NewEntityNotCreatedYetException(e.value);
}
}
use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.
the class WikibaseSchema method evaluate.
/**
* Evaluates the schema on a project, returning a list of EntityUpdates generated
* by the schema.
*
* Some warnings will be emitted in the warning store: those are only the ones
* that are generated at evaluation time (such as invalid formats for dates).
* Issues detected on candidate statements (such as constraint violations) are
* not included at this stage.
*
* @param project
* the project on which the schema should be evaluated
* @param engine
* the engine, which gives access to the current facets
* @param warningStore
* a store in which issues will be emitted
* @return entity updates are stored in their generating order (not merged yet).
*/
public List<TermedStatementEntityEdit> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
List<TermedStatementEntityEdit> result = new ArrayList<>();
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
return result;
}
use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.
the class EditBatchProcessorTest method testMultipleBatches.
@Test
public void testMultipleBatches() throws MediaWikiApiErrorException, InterruptedException, IOException {
// Prepare test data
MonolingualTextValue description = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
List<String> ids = new ArrayList<>();
for (int i = 124; i < 190; i++) {
ids.add("Q" + String.valueOf(i));
}
List<ItemIdValue> qids = ids.stream().map(e -> Datamodel.makeWikidataItemIdValue(e)).collect(Collectors.toList());
List<TermedStatementEntityEdit> batch = qids.stream().map(qid -> new TermedStatementEntityEditBuilder(qid).addDescription(description, true).build()).collect(Collectors.toList());
int batchSize = 50;
List<ItemDocument> fullBatch = qids.stream().map(qid -> ItemDocumentBuilder.forItemId(qid).withStatement(TestingData.generateStatement(qid, TestingData.existingId)).build()).collect(Collectors.toList());
List<ItemDocument> firstBatch = fullBatch.subList(0, batchSize);
List<ItemDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
when(fetcher.getEntityDocuments(toQids(firstBatch))).thenReturn(toMap(firstBatch));
when(fetcher.getEntityDocuments(toQids(secondBatch))).thenReturn(toMap(secondBatch));
// Run edits
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, batchSize);
assertEquals(0, processor.progress());
for (int i = 124; i < 190; i++) {
assertEquals(processor.remainingEdits(), 190 - i);
processor.performEdit();
}
assertEquals(0, processor.remainingEdits());
assertEquals(100, processor.progress());
// Check result
assertEquals(new NewEntityLibrary(), library);
verify(fetcher, times(1)).getEntityDocuments(toQids(firstBatch));
verify(fetcher, times(1)).getEntityDocuments(toQids(secondBatch));
for (ItemDocument doc : fullBatch) {
verify(editor, times(1)).editEntityDocument(Datamodel.makeItemUpdate(doc.getEntityId(), doc.getRevisionId(), Datamodel.makeTermUpdate(Collections.emptyList(), Collections.emptyList()), Datamodel.makeTermUpdate(Collections.singletonList(description), Collections.emptyList()), Collections.emptyMap(), Datamodel.makeStatementUpdate(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()), Collections.emptyList(), Collections.emptyList()), false, summary, tags);
}
}
use of org.openrefine.wikidata.updates.TermedStatementEntityEdit in project OpenRefine by OpenRefine.
the class EditBatchProcessorTest method testNewItem.
@Test
public void testNewItem() throws InterruptedException, MediaWikiApiErrorException, IOException {
List<TermedStatementEntityEdit> batch = new ArrayList<>();
batch.add(new TermedStatementEntityEditBuilder(TestingData.existingId).addAlias(Datamodel.makeMonolingualTextValue("my new alias", "en")).addStatement(TestingData.generateStatementAddition(TestingData.existingId, TestingData.newIdA)).build());
MonolingualTextValue label = Datamodel.makeMonolingualTextValue("better label", "en");
batch.add(new TermedStatementEntityEditBuilder(TestingData.newIdA).addAlias(label).build());
// Plan expected edits
ItemDocument existingItem = ItemDocumentBuilder.forItemId(TestingData.existingId).withLabel(Datamodel.makeMonolingualTextValue("pomme", "fr")).withDescription(Datamodel.makeMonolingualTextValue("fruit délicieux", "fr")).build();
when(fetcher.getEntityDocuments(Collections.singletonList(TestingData.existingId.getId()))).thenReturn(Collections.singletonMap(TestingData.existingId.getId(), existingItem));
ItemDocument expectedNewItem = ItemDocumentBuilder.forItemId(TestingData.newIdA).withLabel(label).build();
ItemDocument createdNewItem = ItemDocumentBuilder.forItemId(Datamodel.makeWikidataItemIdValue("Q1234")).withLabel(label).withRevisionId(37828L).build();
when(editor.createItemDocument(expectedNewItem, summary, tags)).thenReturn(createdNewItem);
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, 50);
assertEquals(2, processor.remainingEdits());
assertEquals(0, processor.progress());
processor.performEdit();
assertEquals(1, processor.remainingEdits());
assertEquals(50, processor.progress());
processor.performEdit();
assertEquals(0, processor.remainingEdits());
assertEquals(100, processor.progress());
// does not do anything
processor.performEdit();
assertEquals(0, processor.remainingEdits());
assertEquals(100, processor.progress());
NewEntityLibrary expectedLibrary = new NewEntityLibrary();
expectedLibrary.setId(1234L, "Q1234");
assertEquals(expectedLibrary, library);
}
Aggregations