use of org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue in project OpenRefine by OpenRefine.
the class TermedStatementEntityEditTest method testMergeLabelsIfNewOverriding.
@Test
public void testMergeLabelsIfNewOverriding() {
MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en");
MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en");
TermedStatementEntityEdit edit1 = new TermedStatementEntityEditBuilder(existingSubject).addLabel(label1, true).build();
TermedStatementEntityEdit edit2 = new TermedStatementEntityEditBuilder(existingSubject).addLabel(label2, false).build();
TermedStatementEntityEdit merged = edit1.merge(edit2);
assertEquals(Collections.singleton(label1), merged.getLabels());
assertEquals(Collections.emptySet(), merged.getLabelsIfNew());
}
use of org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue in project OpenRefine by OpenRefine.
the class TermedStatementEntityEditTest method testNormalizeTerms.
@Test
public void testNormalizeTerms() {
MonolingualTextValue aliasEn = Datamodel.makeMonolingualTextValue("alias", "en");
MonolingualTextValue aliasFr = Datamodel.makeMonolingualTextValue("coucou", "fr");
TermedStatementEntityEdit updateA = new TermedStatementEntityEditBuilder(newSubject).addLabel(label, true).addAlias(aliasEn).addAlias(aliasFr).build();
assertFalse(updateA.isNull());
TermedStatementEntityEdit normalized = updateA.normalizeLabelsAndAliases();
TermedStatementEntityEdit expectedUpdate = new TermedStatementEntityEditBuilder(newSubject).addLabel(label, true).addAlias(aliasEn).addLabel(aliasFr, true).build();
assertEquals(expectedUpdate, normalized);
}
use of org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue in project OpenRefine by OpenRefine.
the class LaxValueMatcherTests method testMonolingualText.
@Test
public void testMonolingualText() {
MonolingualTextValue value1 = Datamodel.makeMonolingualTextValue("foo", "en");
MonolingualTextValue value2 = Datamodel.makeMonolingualTextValue("\tfoo ", "en");
MonolingualTextValue value3 = Datamodel.makeMonolingualTextValue("bar", "en");
assertTrue(SUT.match(value1, value2));
assertFalse(SUT.match(value1, value3));
}
use of org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue in project OpenRefine by OpenRefine.
the class EditBatchProcessorTest method testMultipleBatchesMediaInfo.
@Test
public void testMultipleBatchesMediaInfo() throws MediaWikiApiErrorException, InterruptedException, IOException {
// Prepare test data
MonolingualTextValue label = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
List<MonolingualTextValue> labels = Collections.singletonList(label);
TermUpdate labelsUpdate = Datamodel.makeTermUpdate(labels, Collections.emptyList());
List<String> ids = new ArrayList<>();
for (int i = 124; i < 190; i++) {
ids.add("M" + String.valueOf(i));
}
List<MediaInfoIdValue> mids = ids.stream().map(e -> Datamodel.makeWikimediaCommonsMediaInfoIdValue(e)).collect(Collectors.toList());
List<TermedStatementEntityEdit> batch = mids.stream().map(mid -> new TermedStatementEntityEditBuilder(mid).addLabel(label, false).build()).collect(Collectors.toList());
int batchSize = 50;
List<MediaInfoDocument> fullBatch = mids.stream().map(mid -> Datamodel.makeMediaInfoDocument(mid)).collect(Collectors.toList());
List<MediaInfoDocument> firstBatch = fullBatch.subList(0, batchSize);
List<MediaInfoDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
when(fetcher.getEntityDocuments(toMids(firstBatch))).thenReturn(toMapMediaInfo(firstBatch));
when(fetcher.getEntityDocuments(toMids(secondBatch))).thenReturn(toMapMediaInfo(secondBatch));
// Run edits
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, maxlag, tags, batchSize);
assertEquals(0, processor.progress());
for (int i = 124; i < 190; i++) {
assertEquals(processor.remainingEdits(), 190 - i);
processor.performEdit();
}
assertEquals(0, processor.remainingEdits());
assertEquals(100, processor.progress());
// Check result
assertEquals(new NewEntityLibrary(), library);
verify(fetcher, times(1)).getEntityDocuments(toMids(firstBatch));
verify(fetcher, times(1)).getEntityDocuments(toMids(secondBatch));
for (MediaInfoDocument doc : fullBatch) {
StatementUpdate statementUpdate = Datamodel.makeStatementUpdate(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
verify(editor, times(1)).editEntityDocument(Datamodel.makeMediaInfoUpdate((MediaInfoIdValue) doc.getEntityId(), doc.getRevisionId(), labelsUpdate, statementUpdate), false, summary, tags);
}
}
use of org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue in project OpenRefine by OpenRefine.
the class LaxValueMatcher method match.
@Override
public boolean match(Value existing, Value added) {
if (existing instanceof EntityIdValue && added instanceof EntityIdValue) {
// to mix up entities from different Wikibases in the same data slot
return ((EntityIdValue) existing).getId().equals(((EntityIdValue) added).getId());
} else if (existing instanceof StringValue && added instanceof StringValue) {
// disregard trailing whitespace differences
String existingStr = ((StringValue) existing).getString().trim();
String addedStr = ((StringValue) added).getString().trim();
// if they look like URLs, then http(s) and trailing slashes do not matter
try {
URI existingUrl = extraURINormalize(new URI(existingStr).normalize());
URI addedUrl = extraURINormalize(new URI(addedStr).normalize());
return existingUrl.equals(addedUrl);
} catch (URISyntaxException e) {
// fall back on basic comparison
;
}
return existingStr.equals(addedStr);
} else if (existing instanceof MonolingualTextValue && added instanceof MonolingualTextValue) {
// ignore differences of trailing whitespace
MonolingualTextValue existingMTV = (MonolingualTextValue) existing;
MonolingualTextValue addedMTV = (MonolingualTextValue) added;
return (existingMTV.getLanguageCode().equals(addedMTV.getLanguageCode()) && existingMTV.getText().trim().equals(addedMTV.getText().trim()));
} else if (existing instanceof QuantityValue && added instanceof QuantityValue) {
QuantityValue existingQuantity = (QuantityValue) existing;
QuantityValue addedQuantity = (QuantityValue) added;
BigDecimal existingLowerBound = existingQuantity.getLowerBound();
BigDecimal addedLowerBound = addedQuantity.getLowerBound();
BigDecimal existingUpperBound = existingQuantity.getUpperBound();
BigDecimal addedUpperBound = addedQuantity.getUpperBound();
// artificially set bounds for quantities which have neither lower nor upper bounds
if (existingLowerBound == null && existingUpperBound == null) {
existingLowerBound = existingQuantity.getNumericValue();
existingUpperBound = existingQuantity.getNumericValue();
}
if (addedLowerBound == null && addedUpperBound == null) {
addedLowerBound = addedQuantity.getNumericValue();
addedUpperBound = addedQuantity.getNumericValue();
}
if (existingQuantity.getUnit().equals(addedQuantity.getUnit()) && (existingLowerBound != null) && (addedLowerBound != null) && (existingUpperBound != null) && (addedUpperBound != null)) {
// Consider the two values to be equal when their confidence interval overlaps
return ((existingLowerBound.compareTo(addedLowerBound) <= 0 && addedLowerBound.compareTo(existingUpperBound) <= 0) || (addedLowerBound.compareTo(existingLowerBound) <= 0 && existingLowerBound.compareTo(addedUpperBound) <= 0));
}
} else if (existing instanceof GlobeCoordinatesValue && added instanceof GlobeCoordinatesValue) {
GlobeCoordinatesValue addedCoords = (GlobeCoordinatesValue) added;
GlobeCoordinatesValue existingCoords = (GlobeCoordinatesValue) existing;
if (!addedCoords.getGlobeItemId().getId().equals(existingCoords.getGlobeItemId().getId())) {
return false;
}
double addedMinLon = addedCoords.getLongitude() - addedCoords.getPrecision();
double addedMaxLon = addedCoords.getLongitude() + addedCoords.getPrecision();
double addedMinLat = addedCoords.getLatitude() - addedCoords.getPrecision();
double addedMaxLat = addedCoords.getLatitude() + addedCoords.getPrecision();
double existingMinLon = existingCoords.getLongitude() - existingCoords.getPrecision();
double existingMaxLon = existingCoords.getLongitude() + existingCoords.getPrecision();
double existingMinLat = existingCoords.getLatitude() - existingCoords.getPrecision();
double existingMaxLat = existingCoords.getLatitude() + existingCoords.getPrecision();
// return true when the two "rectangles" (in coordinate space) overlap (not strictly)
return ((addedMinLon <= existingMinLon && addedMinLat <= existingMinLat && existingMinLon <= addedMaxLon && existingMinLat <= addedMaxLat) || (existingMinLon <= addedMinLon && existingMinLat <= addedMinLat && addedMinLon <= existingMaxLon && addedMinLat <= existingMaxLat));
} else if (existing instanceof TimeValue && added instanceof TimeValue) {
TimeValue existingTime = (TimeValue) existing;
TimeValue addedTime = (TimeValue) added;
if (!existingTime.getPreferredCalendarModel().equals(addedTime.getPreferredCalendarModel())) {
return false;
}
int minPrecision = Math.min(existingTime.getPrecision(), addedTime.getPrecision());
if (minPrecision <= 9) {
// the precision is a multiple of years
long yearPrecision = (long) Math.pow(10, 9 - minPrecision);
long addedValue = addedTime.getYear() / yearPrecision;
long existingValue = existingTime.getYear() / yearPrecision;
return addedValue == existingValue;
} else if (minPrecision == 10) {
// month precision
return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth());
} else if (minPrecision == 11) {
// day precision
return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth() && addedTime.getDay() == existingTime.getDay());
}
// TODO possible improvements: bounds support, timezone support
}
// fall back to exact comparison for other datatypes
return existing.equals(added);
}
Aggregations