use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.
the class LaxValueMatcherTests method testQids.
@Test
public void testQids() {
EntityIdValue qid1 = Datamodel.makeItemIdValue("Q123", "https://foo.com");
EntityIdValue qid2 = Datamodel.makeItemIdValue("Q123", "https://bar.com");
EntityIdValue qid3 = Datamodel.makeItemIdValue("Q456", "https://foo.com");
assertTrue(SUT.match(qid1, qid1));
assertTrue(SUT.match(qid1, qid2));
assertFalse(SUT.match(qid1, qid3));
}
use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.
the class WbEntityIdValueConstantTest method testEvaluate.
@Test
public void testEvaluate() {
EntityIdValue result = constant.evaluate(ctxt);
Assert.assertEquals(Datamodel.makeWikidataPropertyIdValue("P48").getIri(), result.getIri());
}
use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.
the class LaxValueMatcher method match.
@Override
public boolean match(Value existing, Value added) {
if (existing instanceof EntityIdValue && added instanceof EntityIdValue) {
// to mix up entities from different Wikibases in the same data slot
return ((EntityIdValue) existing).getId().equals(((EntityIdValue) added).getId());
} else if (existing instanceof StringValue && added instanceof StringValue) {
// disregard trailing whitespace differences
String existingStr = ((StringValue) existing).getString().trim();
String addedStr = ((StringValue) added).getString().trim();
// if they look like URLs, then http(s) and trailing slashes do not matter
try {
URI existingUrl = extraURINormalize(new URI(existingStr).normalize());
URI addedUrl = extraURINormalize(new URI(addedStr).normalize());
return existingUrl.equals(addedUrl);
} catch (URISyntaxException e) {
// fall back on basic comparison
;
}
return existingStr.equals(addedStr);
} else if (existing instanceof MonolingualTextValue && added instanceof MonolingualTextValue) {
// ignore differences of trailing whitespace
MonolingualTextValue existingMTV = (MonolingualTextValue) existing;
MonolingualTextValue addedMTV = (MonolingualTextValue) added;
return (existingMTV.getLanguageCode().equals(addedMTV.getLanguageCode()) && existingMTV.getText().trim().equals(addedMTV.getText().trim()));
} else if (existing instanceof QuantityValue && added instanceof QuantityValue) {
QuantityValue existingQuantity = (QuantityValue) existing;
QuantityValue addedQuantity = (QuantityValue) added;
BigDecimal existingLowerBound = existingQuantity.getLowerBound();
BigDecimal addedLowerBound = addedQuantity.getLowerBound();
BigDecimal existingUpperBound = existingQuantity.getUpperBound();
BigDecimal addedUpperBound = addedQuantity.getUpperBound();
// artificially set bounds for quantities which have neither lower nor upper bounds
if (existingLowerBound == null && existingUpperBound == null) {
existingLowerBound = existingQuantity.getNumericValue();
existingUpperBound = existingQuantity.getNumericValue();
}
if (addedLowerBound == null && addedUpperBound == null) {
addedLowerBound = addedQuantity.getNumericValue();
addedUpperBound = addedQuantity.getNumericValue();
}
if (existingQuantity.getUnit().equals(addedQuantity.getUnit()) && (existingLowerBound != null) && (addedLowerBound != null) && (existingUpperBound != null) && (addedUpperBound != null)) {
// Consider the two values to be equal when their confidence interval overlaps
return ((existingLowerBound.compareTo(addedLowerBound) <= 0 && addedLowerBound.compareTo(existingUpperBound) <= 0) || (addedLowerBound.compareTo(existingLowerBound) <= 0 && existingLowerBound.compareTo(addedUpperBound) <= 0));
}
} else if (existing instanceof GlobeCoordinatesValue && added instanceof GlobeCoordinatesValue) {
GlobeCoordinatesValue addedCoords = (GlobeCoordinatesValue) added;
GlobeCoordinatesValue existingCoords = (GlobeCoordinatesValue) existing;
if (!addedCoords.getGlobeItemId().getId().equals(existingCoords.getGlobeItemId().getId())) {
return false;
}
double addedMinLon = addedCoords.getLongitude() - addedCoords.getPrecision();
double addedMaxLon = addedCoords.getLongitude() + addedCoords.getPrecision();
double addedMinLat = addedCoords.getLatitude() - addedCoords.getPrecision();
double addedMaxLat = addedCoords.getLatitude() + addedCoords.getPrecision();
double existingMinLon = existingCoords.getLongitude() - existingCoords.getPrecision();
double existingMaxLon = existingCoords.getLongitude() + existingCoords.getPrecision();
double existingMinLat = existingCoords.getLatitude() - existingCoords.getPrecision();
double existingMaxLat = existingCoords.getLatitude() + existingCoords.getPrecision();
// return true when the two "rectangles" (in coordinate space) overlap (not strictly)
return ((addedMinLon <= existingMinLon && addedMinLat <= existingMinLat && existingMinLon <= addedMaxLon && existingMinLat <= addedMaxLat) || (existingMinLon <= addedMinLon && existingMinLat <= addedMinLat && addedMinLon <= existingMaxLon && addedMinLat <= existingMaxLat));
} else if (existing instanceof TimeValue && added instanceof TimeValue) {
TimeValue existingTime = (TimeValue) existing;
TimeValue addedTime = (TimeValue) added;
if (!existingTime.getPreferredCalendarModel().equals(addedTime.getPreferredCalendarModel())) {
return false;
}
int minPrecision = Math.min(existingTime.getPrecision(), addedTime.getPrecision());
if (minPrecision <= 9) {
// the precision is a multiple of years
long yearPrecision = (long) Math.pow(10, 9 - minPrecision);
long addedValue = addedTime.getYear() / yearPrecision;
long existingValue = existingTime.getYear() / yearPrecision;
return addedValue == existingValue;
} else if (minPrecision == 10) {
// month precision
return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth());
} else if (minPrecision == 11) {
// day precision
return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth() && addedTime.getDay() == existingTime.getDay());
}
// TODO possible improvements: bounds support, timezone support
}
// fall back to exact comparison for other datatypes
return existing.equals(added);
}
use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.
the class TermedStatementEntityEdit method groupBySubject.
/**
* Group a list of TermedStatementEntityUpdates by subject: this is useful to make one single
* edit per entity.
*
* @param entityDocuments
* @return a map from entity ids to merged TermedStatementEntityUpdate for that id
*/
public static Map<EntityIdValue, TermedStatementEntityEdit> groupBySubject(List<TermedStatementEntityEdit> entityDocuments) {
Map<EntityIdValue, TermedStatementEntityEdit> map = new HashMap<>();
for (TermedStatementEntityEdit update : entityDocuments) {
if (update.isNull()) {
continue;
}
EntityIdValue qid = update.getEntityId();
if (map.containsKey(qid)) {
TermedStatementEntityEdit oldUpdate = map.get(qid);
map.put(qid, oldUpdate.merge(update));
} else {
map.put(qid, update);
}
}
return map;
}
use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.
the class WikibaseAPIUpdateScheduler method schedule.
@Override
public List<TermedStatementEntityEdit> schedule(List<TermedStatementEntityEdit> updates) {
List<TermedStatementEntityEdit> result = new ArrayList<>();
pointerFreeUpdates = new UpdateSequence();
pointerFullUpdates = new UpdateSequence();
allPointers = new HashSet<>();
for (TermedStatementEntityEdit update : updates) {
splitUpdate(update);
}
// Part 1: add all the pointer free updates
result.addAll(pointerFreeUpdates.getUpdates());
// Part 1': add the remaining new entities that have not been touched
Set<EntityIdValue> unseenPointers = new HashSet<>(allPointers);
unseenPointers.removeAll(pointerFreeUpdates.getSubjects());
result.addAll(unseenPointers.stream().map(e -> new TermedStatementEntityEditBuilder(e).build()).collect(Collectors.toList()));
// Part 2: add all the pointer full updates
result.addAll(pointerFullUpdates.getUpdates());
return result;
}
Aggregations