Search in sources :

Example 11 with EntityIdValue

use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.

the class LaxValueMatcherTests method testQids.

@Test
public void testQids() {
    EntityIdValue qid1 = Datamodel.makeItemIdValue("Q123", "https://foo.com");
    EntityIdValue qid2 = Datamodel.makeItemIdValue("Q123", "https://bar.com");
    EntityIdValue qid3 = Datamodel.makeItemIdValue("Q456", "https://foo.com");
    assertTrue(SUT.match(qid1, qid1));
    assertTrue(SUT.match(qid1, qid2));
    assertFalse(SUT.match(qid1, qid3));
}
Also used : EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) Test(org.testng.annotations.Test)

Example 12 with EntityIdValue

use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.

the class WbEntityIdValueConstantTest method testEvaluate.

@Test
public void testEvaluate() {
    EntityIdValue result = constant.evaluate(ctxt);
    Assert.assertEquals(Datamodel.makeWikidataPropertyIdValue("P48").getIri(), result.getIri());
}
Also used : EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) JacksonSerializationTest(org.openrefine.wikidata.testing.JacksonSerializationTest) Test(org.testng.annotations.Test)

Example 13 with EntityIdValue

use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.

the class LaxValueMatcher method match.

@Override
public boolean match(Value existing, Value added) {
    if (existing instanceof EntityIdValue && added instanceof EntityIdValue) {
        // to mix up entities from different Wikibases in the same data slot
        return ((EntityIdValue) existing).getId().equals(((EntityIdValue) added).getId());
    } else if (existing instanceof StringValue && added instanceof StringValue) {
        // disregard trailing whitespace differences
        String existingStr = ((StringValue) existing).getString().trim();
        String addedStr = ((StringValue) added).getString().trim();
        // if they look like URLs, then http(s) and trailing slashes do not matter
        try {
            URI existingUrl = extraURINormalize(new URI(existingStr).normalize());
            URI addedUrl = extraURINormalize(new URI(addedStr).normalize());
            return existingUrl.equals(addedUrl);
        } catch (URISyntaxException e) {
            // fall back on basic comparison
            ;
        }
        return existingStr.equals(addedStr);
    } else if (existing instanceof MonolingualTextValue && added instanceof MonolingualTextValue) {
        // ignore differences of trailing whitespace
        MonolingualTextValue existingMTV = (MonolingualTextValue) existing;
        MonolingualTextValue addedMTV = (MonolingualTextValue) added;
        return (existingMTV.getLanguageCode().equals(addedMTV.getLanguageCode()) && existingMTV.getText().trim().equals(addedMTV.getText().trim()));
    } else if (existing instanceof QuantityValue && added instanceof QuantityValue) {
        QuantityValue existingQuantity = (QuantityValue) existing;
        QuantityValue addedQuantity = (QuantityValue) added;
        BigDecimal existingLowerBound = existingQuantity.getLowerBound();
        BigDecimal addedLowerBound = addedQuantity.getLowerBound();
        BigDecimal existingUpperBound = existingQuantity.getUpperBound();
        BigDecimal addedUpperBound = addedQuantity.getUpperBound();
        // artificially set bounds for quantities which have neither lower nor upper bounds
        if (existingLowerBound == null && existingUpperBound == null) {
            existingLowerBound = existingQuantity.getNumericValue();
            existingUpperBound = existingQuantity.getNumericValue();
        }
        if (addedLowerBound == null && addedUpperBound == null) {
            addedLowerBound = addedQuantity.getNumericValue();
            addedUpperBound = addedQuantity.getNumericValue();
        }
        if (existingQuantity.getUnit().equals(addedQuantity.getUnit()) && (existingLowerBound != null) && (addedLowerBound != null) && (existingUpperBound != null) && (addedUpperBound != null)) {
            // Consider the two values to be equal when their confidence interval overlaps
            return ((existingLowerBound.compareTo(addedLowerBound) <= 0 && addedLowerBound.compareTo(existingUpperBound) <= 0) || (addedLowerBound.compareTo(existingLowerBound) <= 0 && existingLowerBound.compareTo(addedUpperBound) <= 0));
        }
    } else if (existing instanceof GlobeCoordinatesValue && added instanceof GlobeCoordinatesValue) {
        GlobeCoordinatesValue addedCoords = (GlobeCoordinatesValue) added;
        GlobeCoordinatesValue existingCoords = (GlobeCoordinatesValue) existing;
        if (!addedCoords.getGlobeItemId().getId().equals(existingCoords.getGlobeItemId().getId())) {
            return false;
        }
        double addedMinLon = addedCoords.getLongitude() - addedCoords.getPrecision();
        double addedMaxLon = addedCoords.getLongitude() + addedCoords.getPrecision();
        double addedMinLat = addedCoords.getLatitude() - addedCoords.getPrecision();
        double addedMaxLat = addedCoords.getLatitude() + addedCoords.getPrecision();
        double existingMinLon = existingCoords.getLongitude() - existingCoords.getPrecision();
        double existingMaxLon = existingCoords.getLongitude() + existingCoords.getPrecision();
        double existingMinLat = existingCoords.getLatitude() - existingCoords.getPrecision();
        double existingMaxLat = existingCoords.getLatitude() + existingCoords.getPrecision();
        // return true when the two "rectangles" (in coordinate space) overlap (not strictly)
        return ((addedMinLon <= existingMinLon && addedMinLat <= existingMinLat && existingMinLon <= addedMaxLon && existingMinLat <= addedMaxLat) || (existingMinLon <= addedMinLon && existingMinLat <= addedMinLat && addedMinLon <= existingMaxLon && addedMinLat <= existingMaxLat));
    } else if (existing instanceof TimeValue && added instanceof TimeValue) {
        TimeValue existingTime = (TimeValue) existing;
        TimeValue addedTime = (TimeValue) added;
        if (!existingTime.getPreferredCalendarModel().equals(addedTime.getPreferredCalendarModel())) {
            return false;
        }
        int minPrecision = Math.min(existingTime.getPrecision(), addedTime.getPrecision());
        if (minPrecision <= 9) {
            // the precision is a multiple of years
            long yearPrecision = (long) Math.pow(10, 9 - minPrecision);
            long addedValue = addedTime.getYear() / yearPrecision;
            long existingValue = existingTime.getYear() / yearPrecision;
            return addedValue == existingValue;
        } else if (minPrecision == 10) {
            // month precision
            return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth());
        } else if (minPrecision == 11) {
            // day precision
            return (addedTime.getYear() == existingTime.getYear() && addedTime.getMonth() == existingTime.getMonth() && addedTime.getDay() == existingTime.getDay());
        }
    // TODO possible improvements: bounds support, timezone support
    }
    // fall back to exact comparison for other datatypes
    return existing.equals(added);
}
Also used : GlobeCoordinatesValue(org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue) QuantityValue(org.wikidata.wdtk.datamodel.interfaces.QuantityValue) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) MonolingualTextValue(org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue) URISyntaxException(java.net.URISyntaxException) StringValue(org.wikidata.wdtk.datamodel.interfaces.StringValue) URI(java.net.URI) BigDecimal(java.math.BigDecimal) TimeValue(org.wikidata.wdtk.datamodel.interfaces.TimeValue)

Example 14 with EntityIdValue

use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.

the class TermedStatementEntityEdit method groupBySubject.

/**
 * Group a list of TermedStatementEntityUpdates by subject: this is useful to make one single
 * edit per entity.
 *
 * @param entityDocuments
 * @return a map from entity ids to merged TermedStatementEntityUpdate for that id
 */
public static Map<EntityIdValue, TermedStatementEntityEdit> groupBySubject(List<TermedStatementEntityEdit> entityDocuments) {
    Map<EntityIdValue, TermedStatementEntityEdit> map = new HashMap<>();
    for (TermedStatementEntityEdit update : entityDocuments) {
        if (update.isNull()) {
            continue;
        }
        EntityIdValue qid = update.getEntityId();
        if (map.containsKey(qid)) {
            TermedStatementEntityEdit oldUpdate = map.get(qid);
            map.put(qid, oldUpdate.merge(update));
        } else {
            map.put(qid, update);
        }
    }
    return map;
}
Also used : HashMap(java.util.HashMap) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)

Example 15 with EntityIdValue

use of org.wikidata.wdtk.datamodel.interfaces.EntityIdValue in project OpenRefine by OpenRefine.

the class WikibaseAPIUpdateScheduler method schedule.

@Override
public List<TermedStatementEntityEdit> schedule(List<TermedStatementEntityEdit> updates) {
    List<TermedStatementEntityEdit> result = new ArrayList<>();
    pointerFreeUpdates = new UpdateSequence();
    pointerFullUpdates = new UpdateSequence();
    allPointers = new HashSet<>();
    for (TermedStatementEntityEdit update : updates) {
        splitUpdate(update);
    }
    // Part 1: add all the pointer free updates
    result.addAll(pointerFreeUpdates.getUpdates());
    // Part 1': add the remaining new entities that have not been touched
    Set<EntityIdValue> unseenPointers = new HashSet<>(allPointers);
    unseenPointers.removeAll(pointerFreeUpdates.getSubjects());
    result.addAll(unseenPointers.stream().map(e -> new TermedStatementEntityEditBuilder(e).build()).collect(Collectors.toList()));
    // Part 2: add all the pointer full updates
    result.addAll(pointerFullUpdates.getUpdates());
    return result;
}
Also used : TermedStatementEntityEditBuilder(org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder) TermedStatementEntityEdit(org.openrefine.wikidata.updates.TermedStatementEntityEdit) ArrayList(java.util.ArrayList) EntityIdValue(org.wikidata.wdtk.datamodel.interfaces.EntityIdValue) ReconEntityIdValue(org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue) HashSet(java.util.HashSet)

Aggregations

EntityIdValue (org.wikidata.wdtk.datamodel.interfaces.EntityIdValue)20 PropertyIdValue (org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue)11 QAWarning (org.openrefine.wikidata.qa.QAWarning)6 TermedStatementEntityEdit (org.openrefine.wikidata.updates.TermedStatementEntityEdit)6 Statement (org.wikidata.wdtk.datamodel.interfaces.Statement)6 HashSet (java.util.HashSet)5 Value (org.wikidata.wdtk.datamodel.interfaces.Value)5 Set (java.util.Set)4 StatementEdit (org.openrefine.wikidata.updates.StatementEdit)4 TermedStatementEntityEditBuilder (org.openrefine.wikidata.updates.TermedStatementEntityEditBuilder)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 ReconEntityIdValue (org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue)3 SkipSchemaExpressionException (org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException)3 ItemIdValue (org.wikidata.wdtk.datamodel.interfaces.ItemIdValue)3 Snak (org.wikidata.wdtk.datamodel.interfaces.Snak)3 ReconItemIdValue (org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue)2 ReconMediaInfoIdValue (org.openrefine.wikidata.schema.entityvalues.ReconMediaInfoIdValue)2