use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.
the class EditBatchProcessorTest method testMultipleBatchesMediaInfo.
@Test
public void testMultipleBatchesMediaInfo() throws MediaWikiApiErrorException, InterruptedException, IOException {
// Prepare test data
MonolingualTextValue label = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
List<MonolingualTextValue> labels = Collections.singletonList(label);
TermUpdate labelsUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
List<String> ids = new ArrayList<>();
for (int i = 124; i < 190; i++) {
ids.add("M" + String.valueOf(i));
}
List<MediaInfoIdValue> mids = ids.stream().map(e -> Datamodel.makeWikimediaCommonsMediaInfoIdValue(e)).collect(Collectors.toList());
List<TermedStatementEntityEdit> batch = mids.stream().map(mid -> new TermedStatementEntityEditBuilder(mid).addLabel(label, false).build()).collect(Collectors.toList());
int batchSize = 50;
List<MediaInfoDocument> fullBatch = mids.stream().map(mid -> Datamodel.makeMediaInfoDocument(mid)).collect(Collectors.toList());
List<MediaInfoDocument> firstBatch = fullBatch.subList(0, batchSize);
List<MediaInfoDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
when(fetcher.getEntityDocuments(toMids(firstBatch))).thenReturn(toMapMediaInfo(firstBatch));
when(fetcher.getEntityDocuments(toMids(secondBatch))).thenReturn(toMapMediaInfo(secondBatch));
// Run edits
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, batchSize);
assertEquals(0, processor.progress());
for (int i = 124; i < 190; i++) {
assertEquals(processor.remainingEdits(), 190 - i);
processor.performEdit();
}
assertEquals(0, processor.remainingEdits());
assertEquals(100, processor.progress());
// Check result
assertEquals(new NewEntityLibrary(), library);
verify(fetcher, times(1)).getEntityDocuments(toMids(firstBatch));
verify(fetcher, times(1)).getEntityDocuments(toMids(secondBatch));
for (MediaInfoDocument doc : fullBatch) {
StatementUpdate statementUpdate = Datamodel.makeStatementUpdate(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
verify(editor, times(1)).editEntityDocument(Datamodel.makeMediaInfoUpdate((MediaInfoIdValue) doc.getEntityId(), doc.getRevisionId(), labelsUpdate, statementUpdate), false, summary, tags);
}
}
use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.
the class TermedStatementEntityEdit method toEntityUpdate.
/**
* In case the subject id is not new, returns the corresponding update given
* the current state of the entity.
*/
public EntityUpdate toEntityUpdate(EntityDocument entityDocument) {
Validate.isFalse(isNew(), "Cannot create a corresponding entity update for a creation of a new entity.");
if (id instanceof ItemIdValue) {
ItemDocument itemDocument = (ItemDocument) entityDocument;
// Labels
List<MonolingualTextValue> labels = getLabels().stream().collect(Collectors.toList());
labels.addAll(getLabelsIfNew().stream().filter(label -> !itemDocument.getLabels().containsKey(label.getLanguageCode())).collect(Collectors.toList()));
TermUpdate labelUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
// Descriptions
List<MonolingualTextValue> descriptions = getDescriptions().stream().collect(Collectors.toList());
descriptions.addAll(getDescriptionsIfNew().stream().filter(desc -> !itemDocument.getDescriptions().containsKey(desc.getLanguageCode())).collect(Collectors.toList()));
TermUpdate descriptionUpdate = Datamodel.makeTermUpdate(descriptions, Collections.emptyList());
// Aliases
Set<MonolingualTextValue> aliases = getAliases();
Map<String, List<MonolingualTextValue>> aliasesMap = aliases.stream().collect(Collectors.groupingBy(MonolingualTextValue::getLanguageCode));
Map<String, AliasUpdate> aliasMap = aliasesMap.entrySet().stream().collect(Collectors.toMap(Entry::getKey, e -> Datamodel.makeAliasUpdate(e.getValue(), Collections.emptyList())));
// Statements
StatementUpdate statementUpdate = toStatementUpdate(itemDocument);
return Datamodel.makeItemUpdate((ItemIdValue) getEntityId(), entityDocument.getRevisionId(), labelUpdate, descriptionUpdate, aliasMap, statementUpdate, Collections.emptyList(), Collections.emptyList());
} else if (id instanceof MediaInfoIdValue) {
MediaInfoDocument mediaInfoDocument = (MediaInfoDocument) entityDocument;
// Labels (captions)
List<MonolingualTextValue> labels = getLabels().stream().collect(Collectors.toList());
labels.addAll(getLabelsIfNew().stream().filter(label -> !mediaInfoDocument.getLabels().containsKey(label.getLanguageCode())).collect(Collectors.toList()));
TermUpdate labelUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
// Statements
StatementUpdate statementUpdate = toStatementUpdate(mediaInfoDocument);
return Datamodel.makeMediaInfoUpdate((MediaInfoIdValue) id, entityDocument.getRevisionId(), labelUpdate, statementUpdate);
} else {
throw new NotImplementedException("Editing entities of type " + id.getEntityType() + " is not supported yet.");
}
}
use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.
the class EditBatchProcessor method performEdit.
/**
* Performs the next edit in the batch.
*
* @throws InterruptedException
*/
public void performEdit() throws InterruptedException {
if (remainingEdits() == 0) {
return;
}
if (batchCursor == currentBatch.size()) {
prepareNewBatch();
}
TermedStatementEntityEdit update = currentBatch.get(batchCursor);
// Rewrite mentions to new entities
ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getEntityId());
try {
update = rewriter.rewrite(update);
} catch (NewEntityNotCreatedYetException e) {
logger.warn("Failed to rewrite update on entity " + update.getEntityId() + ". Missing entity: " + e.getMissingEntity() + ". Skipping update.");
batchCursor++;
return;
}
try {
// New entities
if (update.isNew()) {
ReconEntityIdValue newCell = (ReconEntityIdValue) update.getEntityId();
// TODO Antonin, 2022-02-11: remove this casting once we have https://github.com/Wikidata/Wikidata-Toolkit/issues/651
if (newCell instanceof ItemIdValue) {
update = update.normalizeLabelsAndAliases();
ItemDocument itemDocument = (ItemDocument) update.toNewEntity();
ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary, tags);
library.setId(newCell.getReconInternalId(), createdDoc.getEntityId().getId());
} else if (newCell instanceof MediaInfoIdValue) {
update = update.normalizeLabelsAndAliases();
throw new NotImplementedException();
}
} else {
// Existing entities
EntityUpdate entityUpdate = update.toEntityUpdate(currentDocs.get(update.getEntityId().getId()));
editor.editEntityDocument(entityUpdate, false, summary, tags);
}
} catch (MediaWikiApiErrorException e) {
// TODO find a way to report these errors to the user in a nice way
logger.warn("MediaWiki error while editing [" + e.getErrorCode() + "]: " + e.getErrorMessage());
} catch (IOException e) {
logger.warn("IO error while editing: " + e.getMessage());
}
batchCursor++;
}
use of org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue in project OpenRefine by OpenRefine.
the class WbEntityVariable method fromCell.
@Override
public EntityIdValue fromCell(Cell cell, ExpressionContext ctxt) throws SkipSchemaExpressionException {
if (cell.recon != null && (Judgment.Matched.equals(cell.recon.judgment) || Judgment.New.equals(cell.recon.judgment))) {
if (Judgment.New.equals(cell.recon.judgment)) {
return new ReconItemIdValue(cell.recon, cell.value.toString());
}
EntityIdValue entityIdValue = EntityIdValueImpl.fromId(cell.recon.match.id, cell.recon.identifierSpace);
EntityIdValue reconEntityIdValue = null;
String entityType = null;
if (entityIdValue instanceof ItemIdValue) {
reconEntityIdValue = new ReconItemIdValue(cell.recon, cell.value.toString());
entityType = "item";
} else if (entityIdValue instanceof MediaInfoIdValue) {
reconEntityIdValue = new ReconMediaInfoIdValue(cell.recon, cell.value.toString());
entityType = "mediainfo";
} else if (entityIdValue instanceof PropertyIdValue) {
reconEntityIdValue = new ReconPropertyIdValue(cell.recon, cell.value.toString());
entityType = "property";
}
if (reconEntityIdValue == null) {
throw new SkipSchemaExpressionException();
}
if (cell.recon.identifierSpace == null || !cell.recon.identifierSpace.equals(ctxt.getBaseIRIForEntityType(entityType))) {
QAWarning warning = new QAWarning("invalid-identifier-space", null, QAWarning.Severity.INFO, 1);
warning.setProperty("example_cell", cell.value.toString());
warning.setProperty("expected_site_iri", ctxt.getBaseIRIForEntityType(entityType));
ctxt.addWarning(warning);
throw new SkipSchemaExpressionException();
}
return reconEntityIdValue;
}
throw new SkipSchemaExpressionException();
}
Aggregations