Search in sources :

Example 1 with WikibaseAPIUpdateScheduler

use of org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler in project OpenRefine by OpenRefine.

the class PreviewWikibaseSchemaCommand method doPost.

/**
 * This command uses POST but is left CSRF-unprotected since it does not
 * incur a side effect or state change in the backend.
 * The reason why it uses POST is to make sure large schemas and engines
 * can be passed as parameters.
 */
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    try {
        Project project = getProject(request);
        response.setCharacterEncoding("UTF-8");
        response.setHeader("Content-Type", "application/json");
        String schemaJson = request.getParameter("schema");
        WikibaseSchema schema = null;
        if (schemaJson != null) {
            try {
                schema = WikibaseSchema.reconstruct(schemaJson);
            } catch (IOException e) {
                respondError(response, "Wikibase schema could not be parsed. Error message: " + e.getMessage());
                return;
            }
        } else {
            schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
        }
        if (schema == null) {
            respondError(response, "No Wikibase schema provided.");
            return;
        }
        Manifest manifest = null;
        String manifestJson = request.getParameter("manifest");
        if (manifestJson != null) {
            try {
                manifest = ManifestParser.parse(manifestJson);
            } catch (ManifestException e) {
                respondError(response, "Wikibase manifest could not be parsed. Error message: " + e.getMessage());
                return;
            }
        }
        if (manifest == null) {
            respondError(response, "No Wikibase manifest provided.");
            return;
        }
        QAWarningStore warningStore = new QAWarningStore();
        // Evaluate project
        Engine engine = getEngine(request, project);
        List<TermedStatementEntityEdit> editBatch = schema.evaluate(project, engine, warningStore);
        // Inspect the edits and generate warnings
        EditInspector inspector = new EditInspector(warningStore, manifest);
        inspector.inspect(editBatch, schema);
        // Dump the first 10 edits, scheduled with the default scheduler
        WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
        List<TermedStatementEntityEdit> nonNullEdits = scheduler.schedule(editBatch).stream().filter(e -> !e.isNull()).collect(Collectors.toList());
        List<TermedStatementEntityEdit> firstEdits = nonNullEdits.stream().limit(10).collect(Collectors.toList());
        PreviewResults previewResults = new PreviewResults(warningStore.getWarnings(), warningStore.getMaxSeverity(), warningStore.getNbWarnings(), nonNullEdits.size(), firstEdits);
        respondJSON(response, previewResults);
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : Project(com.google.refine.model.Project) ServletException(javax.servlet.ServletException) ManifestException(org.openrefine.wikidata.manifests.ManifestException) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) HttpServletResponse(javax.servlet.http.HttpServletResponse) IOException(java.io.IOException) EditInspector(org.openrefine.wikidata.qa.EditInspector) Collectors(java.util.stream.Collectors) ManifestParser(org.openrefine.wikidata.manifests.ManifestParser) List(java.util.List) HttpServletRequest(javax.servlet.http.HttpServletRequest) WikibaseSchema(org.openrefine.wikidata.schema.WikibaseSchema) WikibaseAPIUpdateScheduler(org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler) CommandUtilities.respondError(org.openrefine.wikidata.commands.CommandUtilities.respondError) QAWarningStore(org.openrefine.wikidata.qa.QAWarningStore) Command(com.google.refine.commands.Command) Engine(com.google.refine.browsing.Engine) Manifest(org.openrefine.wikidata.manifests.Manifest) IOException(java.io.IOException) Manifest(org.openrefine.wikidata.manifests.Manifest) ManifestException(org.openrefine.wikidata.manifests.ManifestException) ServletException(javax.servlet.ServletException) ManifestException(org.openrefine.wikidata.manifests.ManifestException) IOException(java.io.IOException) Project(com.google.refine.model.Project) EditInspector(org.openrefine.wikidata.qa.EditInspector) WikibaseAPIUpdateScheduler(org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) WikibaseSchema(org.openrefine.wikidata.schema.WikibaseSchema) QAWarningStore(org.openrefine.wikidata.qa.QAWarningStore) Engine(com.google.refine.browsing.Engine)

Example 2 with WikibaseAPIUpdateScheduler

use of org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler in project OpenRefine by OpenRefine.

the class EditInspector method inspect.

/**
 * Inspect a batch of edits with the registered scrutinizers
 *
 * @param editBatch
 */
public void inspect(List<TermedStatementEntityEdit> editBatch, WikibaseSchema schema) throws ExecutionException {
    // First, schedule them with some scheduler,
    // so that all newly created entities appear in the batch
    SchemaPropertyExtractor fetcher = new SchemaPropertyExtractor();
    Set<PropertyIdValue> properties = fetcher.getAllProperties(schema);
    if (entityCache != null) {
        // Prefetch property documents in one API call rather than requesting them one by one.
        entityCache.getMultipleDocuments(properties.stream().collect(Collectors.toList()));
    }
    WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
    editBatch = scheduler.schedule(editBatch);
    Map<EntityIdValue, TermedStatementEntityEdit> updates = TermedStatementEntityEdit.groupBySubject(editBatch);
    List<TermedStatementEntityEdit> mergedUpdates = updates.values().stream().collect(Collectors.toList());
    for (EditScrutinizer scrutinizer : scrutinizers.values()) {
        scrutinizer.batchIsBeginning();
    }
    for (TermedStatementEntityEdit update : mergedUpdates) {
        if (!update.isNull()) {
            for (EditScrutinizer scrutinizer : scrutinizers.values()) {
                scrutinizer.scrutinize(update);
            }
        }
    }
    for (EditScrutinizer scrutinizer : scrutinizers.values()) {
        scrutinizer.batchIsFinished();
    }
    if (warningStore.getNbWarnings() == 0) {
        warningStore.addWarning(new QAWarning("no-issue-detected", null, QAWarning.Severity.INFO, 0));
    }
}
Also used : PropertyIdValue(org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue) WikibaseAPIUpdateScheduler(org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit)

Aggregations

TermedStatementEntityEdit (org.openrefine.wikidata.updates.TermedStatementEntityEdit)2 WikibaseAPIUpdateScheduler (org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler)2 Engine (com.google.refine.browsing.Engine)1 Command (com.google.refine.commands.Command)1 Project (com.google.refine.model.Project)1 IOException (java.io.IOException)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 ServletException (javax.servlet.ServletException)1 HttpServletRequest (javax.servlet.http.HttpServletRequest)1 HttpServletResponse (javax.servlet.http.HttpServletResponse)1 CommandUtilities.respondError (org.openrefine.wikidata.commands.CommandUtilities.respondError)1 Manifest (org.openrefine.wikidata.manifests.Manifest)1 ManifestException (org.openrefine.wikidata.manifests.ManifestException)1 ManifestParser (org.openrefine.wikidata.manifests.ManifestParser)1 EditInspector (org.openrefine.wikidata.qa.EditInspector)1 QAWarningStore (org.openrefine.wikidata.qa.QAWarningStore)1 WikibaseSchema (org.openrefine.wikidata.schema.WikibaseSchema)1 EntityIdValue (org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)1 PropertyIdValue (org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue)1