Search in sources :

Example 6 with Builder

use of org.apache.rya.indexing.entity.model.Entity.Builder in project incubator-rya by apache.

the class DuplicateDataDetectorIT method testCreateEntityNearDuplicateConfigDisabled.

@Test
public void testCreateEntityNearDuplicateConfigDisabled() throws EntityStorageException, TypeStorageException, ConfigurationException, ObjectStorageException {
    // Create the types the Entity uses.
    final TypeStorage typeStorage = new MongoTypeStorage(super.getMongoClient(), RYA_INSTANCE_NAME);
    final Type personType = createPersonType();
    final Type employeeType = createEmployeeType();
    typeStorage.create(personType);
    typeStorage.create(employeeType);
    final Optional<Type> storedPersonType = typeStorage.get(personType.getId());
    final Optional<Type> storedEmployeeType = typeStorage.get(employeeType.getId());
    assertTrue(storedPersonType.isPresent());
    assertTrue(storedEmployeeType.isPresent());
    // Create it.
    final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig(// boolean
    new Tolerance(0.0, ToleranceType.DIFFERENCE), // byte
    new Tolerance(0.0, ToleranceType.DIFFERENCE), // date
    new Tolerance(500.0, ToleranceType.DIFFERENCE), // double
    new Tolerance(0.0001, ToleranceType.PERCENTAGE), // float
    new Tolerance(0.0001, ToleranceType.PERCENTAGE), // integer
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // long
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // short
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // string
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // uri
    new Tolerance(1.0, ToleranceType.DIFFERENCE), new HashMap<String, List<String>>(), false);
    final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector(duplicateDataConfig);
    final EntityStorage entityStorage = new MongoEntityStorage(super.getMongoClient(), RYA_INSTANCE_NAME, duplicateDataDetector);
    final Entity bobEntity = createBobEntity();
    entityStorage.create(bobEntity);
    assertTrue(entityStorage.get(bobEntity.getSubject()).isPresent());
    final Builder duplicateBobBuilder = Entity.builder(createBobEntity());
    duplicateBobBuilder.setSubject(createRyaUri("Robert"));
    // Modify a property for each type that is within tolerance
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 41)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(250.76f)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(72.499)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(50001)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 2)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(123456789013L)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 1).minusYears(40))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 1))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(true)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bob.smitch01@gmail.com"))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("124 Fake St. Washington, DC 20024")));
    duplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 556)));
    final Entity duplicateBobEntity = duplicateBobBuilder.build();
    // Data duplication detection is disabled so it will be created.
    try {
        entityStorage.create(duplicateBobEntity);
    } catch (final EntityNearDuplicateException e) {
        fail();
    }
    assertTrue(entityStorage.get(duplicateBobEntity.getSubject()).isPresent());
    final Builder notDuplicateBobBuilder = Entity.builder(createBobEntity());
    notDuplicateBobBuilder.setSubject(createRyaUri("Not Bob"));
    // Modify a property for each type that is within tolerance
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 50)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(300.0f)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(100.0)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(60000)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 5)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(9L)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 10000000L).minusYears(40))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 10000000L))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(false)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bad.email.address@gmail.com"))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("123456789 Fake St. Washington, DC 20024")));
    notDuplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 1000)));
    final Entity notDuplicateBobEntity = notDuplicateBobBuilder.build();
    // Data duplication detection is disabled so it will be created.
    try {
        entityStorage.create(notDuplicateBobEntity);
    } catch (final EntityNearDuplicateException e) {
        fail();
    }
    assertTrue(entityStorage.get(notDuplicateBobEntity.getSubject()).isPresent());
}
Also used : Entity(org.apache.rya.indexing.entity.model.Entity) MongoEntityStorage(org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage) EntityStorage(org.apache.rya.indexing.entity.storage.EntityStorage) MongoEntityStorage(org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage) ReflectionToStringBuilder(org.apache.commons.lang.builder.ReflectionToStringBuilder) Builder(org.apache.rya.indexing.entity.model.Entity.Builder) URIImpl(org.openrdf.model.impl.URIImpl) DateTime(org.joda.time.DateTime) Date(java.util.Date) MongoTypeStorage(org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage) TypeStorage(org.apache.rya.indexing.entity.storage.TypeStorage) MongoTypeStorage(org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage) RyaType(org.apache.rya.api.domain.RyaType) RyaTypeUtils.shortRyaType(org.apache.rya.api.domain.RyaTypeUtils.shortRyaType) RyaTypeUtils.floatRyaType(org.apache.rya.api.domain.RyaTypeUtils.floatRyaType) RyaTypeUtils.uriRyaType(org.apache.rya.api.domain.RyaTypeUtils.uriRyaType) RyaTypeUtils.longRyaType(org.apache.rya.api.domain.RyaTypeUtils.longRyaType) RyaTypeUtils.stringRyaType(org.apache.rya.api.domain.RyaTypeUtils.stringRyaType) RyaTypeUtils.doubleRyaType(org.apache.rya.api.domain.RyaTypeUtils.doubleRyaType) RyaTypeUtils.byteRyaType(org.apache.rya.api.domain.RyaTypeUtils.byteRyaType) RyaTypeUtils.booleanRyaType(org.apache.rya.api.domain.RyaTypeUtils.booleanRyaType) RyaTypeUtils.dateRyaType(org.apache.rya.api.domain.RyaTypeUtils.dateRyaType) Type(org.apache.rya.indexing.entity.model.Type) RyaTypeUtils.intRyaType(org.apache.rya.api.domain.RyaTypeUtils.intRyaType) DuplicateDataConfig(org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Property(org.apache.rya.indexing.entity.model.Property) Test(org.junit.Test)

Example 7 with Builder

use of org.apache.rya.indexing.entity.model.Entity.Builder in project incubator-rya by apache.

the class DuplicateDataDetectorIT method testCreateEntityNearDuplicate.

@Test
public void testCreateEntityNearDuplicate() throws EntityStorageException, TypeStorageException, ObjectStorageException {
    // Create the types the Entity uses.
    final TypeStorage typeStorage = new MongoTypeStorage(super.getMongoClient(), RYA_INSTANCE_NAME);
    final Type personType = createPersonType();
    final Type employeeType = createEmployeeType();
    typeStorage.create(personType);
    typeStorage.create(employeeType);
    final Optional<Type> storedPersonType = typeStorage.get(personType.getId());
    final Optional<Type> storedEmployeeType = typeStorage.get(employeeType.getId());
    assertTrue(storedPersonType.isPresent());
    assertTrue(storedEmployeeType.isPresent());
    // Create it.
    final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig(// boolean
    new Tolerance(0.0, ToleranceType.DIFFERENCE), // byte
    new Tolerance(0.0, ToleranceType.DIFFERENCE), // date
    new Tolerance(500.0, ToleranceType.DIFFERENCE), // double
    new Tolerance(0.0001, ToleranceType.PERCENTAGE), // float
    new Tolerance(0.0001, ToleranceType.PERCENTAGE), // integer
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // long
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // short
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // string
    new Tolerance(1.0, ToleranceType.DIFFERENCE), // uri
    new Tolerance(1.0, ToleranceType.DIFFERENCE), new HashMap<String, List<String>>(), true);
    final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector(duplicateDataConfig);
    final EntityStorage entityStorage = new MongoEntityStorage(super.getMongoClient(), RYA_INSTANCE_NAME, duplicateDataDetector);
    final Entity bobEntity = createBobEntity();
    entityStorage.create(bobEntity);
    assertTrue(entityStorage.get(bobEntity.getSubject()).isPresent());
    final Builder duplicateBobBuilder = Entity.builder(createBobEntity());
    duplicateBobBuilder.setSubject(createRyaUri("Robert"));
    // Modify a property for each type that is within tolerance
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 41)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(250.76f)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(72.499)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(50001)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 2)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(123456789013L)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 1).minusYears(40))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 1))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(true)));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bob.smitch01@gmail.com"))));
    duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("124 Fake St. Washington, DC 20024")));
    duplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 556)));
    final Entity duplicateBobEntity = duplicateBobBuilder.build();
    // Try to create another entity that's considered a duplicate.
    // It will NOT be be created.
    boolean hasDuplicate = false;
    try {
        entityStorage.create(duplicateBobEntity);
    } catch (final EntityNearDuplicateException e) {
        hasDuplicate = true;
    }
    assertTrue(hasDuplicate);
    assertFalse(entityStorage.get(duplicateBobEntity.getSubject()).isPresent());
    final Builder notDuplicateBobBuilder = Entity.builder(createBobEntity());
    notDuplicateBobBuilder.setSubject(createRyaUri("Not Bob"));
    // Modify a property for each type that is within tolerance
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 50)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(300.0f)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(100.0)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(60000)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 5)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(9L)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 10000000L).minusYears(40))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 10000000L))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(false)));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bad.email.address@gmail.com"))));
    notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("123456789 Fake St. Washington, DC 20024")));
    notDuplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 1000)));
    final Entity notDuplicateBobEntity = notDuplicateBobBuilder.build();
    // It will be created.
    try {
        entityStorage.create(notDuplicateBobEntity);
    } catch (final EntityNearDuplicateException e) {
        fail();
    }
    assertTrue(entityStorage.get(notDuplicateBobEntity.getSubject()).isPresent());
}
Also used : Entity(org.apache.rya.indexing.entity.model.Entity) MongoEntityStorage(org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage) EntityStorage(org.apache.rya.indexing.entity.storage.EntityStorage) MongoEntityStorage(org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage) ReflectionToStringBuilder(org.apache.commons.lang.builder.ReflectionToStringBuilder) Builder(org.apache.rya.indexing.entity.model.Entity.Builder) URIImpl(org.openrdf.model.impl.URIImpl) DateTime(org.joda.time.DateTime) Date(java.util.Date) MongoTypeStorage(org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage) TypeStorage(org.apache.rya.indexing.entity.storage.TypeStorage) MongoTypeStorage(org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage) RyaType(org.apache.rya.api.domain.RyaType) RyaTypeUtils.shortRyaType(org.apache.rya.api.domain.RyaTypeUtils.shortRyaType) RyaTypeUtils.floatRyaType(org.apache.rya.api.domain.RyaTypeUtils.floatRyaType) RyaTypeUtils.uriRyaType(org.apache.rya.api.domain.RyaTypeUtils.uriRyaType) RyaTypeUtils.longRyaType(org.apache.rya.api.domain.RyaTypeUtils.longRyaType) RyaTypeUtils.stringRyaType(org.apache.rya.api.domain.RyaTypeUtils.stringRyaType) RyaTypeUtils.doubleRyaType(org.apache.rya.api.domain.RyaTypeUtils.doubleRyaType) RyaTypeUtils.byteRyaType(org.apache.rya.api.domain.RyaTypeUtils.byteRyaType) RyaTypeUtils.booleanRyaType(org.apache.rya.api.domain.RyaTypeUtils.booleanRyaType) RyaTypeUtils.dateRyaType(org.apache.rya.api.domain.RyaTypeUtils.dateRyaType) Type(org.apache.rya.indexing.entity.model.Type) RyaTypeUtils.intRyaType(org.apache.rya.api.domain.RyaTypeUtils.intRyaType) DuplicateDataConfig(org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Property(org.apache.rya.indexing.entity.model.Property) Test(org.junit.Test)

Example 8 with Builder

use of org.apache.rya.indexing.entity.model.Entity.Builder in project incubator-rya by apache.

the class DuplicateDataDetectorIT method testEntityMissingProperty.

@Test
public void testEntityMissingProperty() throws SmartUriException, ConfigurationException {
    final Entity entity1 = createBobEntity();
    final Builder builder = new Builder(entity1);
    builder.unsetProperty(PERSON_TYPE_URI, HAS_SSN);
    final Entity entity2 = builder.build();
    final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
    final boolean areDuplicates = duplicateDataDetector.compareEntities(entity1, entity2);
    assertFalse(areDuplicates);
}
Also used : Entity(org.apache.rya.indexing.entity.model.Entity) ReflectionToStringBuilder(org.apache.commons.lang.builder.ReflectionToStringBuilder) Builder(org.apache.rya.indexing.entity.model.Entity.Builder) Test(org.junit.Test)

Aggregations

ReflectionToStringBuilder (org.apache.commons.lang.builder.ReflectionToStringBuilder)8 Entity (org.apache.rya.indexing.entity.model.Entity)8 Builder (org.apache.rya.indexing.entity.model.Entity.Builder)8 Test (org.junit.Test)7 RyaType (org.apache.rya.api.domain.RyaType)3 RyaTypeUtils.booleanRyaType (org.apache.rya.api.domain.RyaTypeUtils.booleanRyaType)3 RyaTypeUtils.byteRyaType (org.apache.rya.api.domain.RyaTypeUtils.byteRyaType)3 RyaTypeUtils.dateRyaType (org.apache.rya.api.domain.RyaTypeUtils.dateRyaType)3 RyaTypeUtils.doubleRyaType (org.apache.rya.api.domain.RyaTypeUtils.doubleRyaType)3 RyaTypeUtils.floatRyaType (org.apache.rya.api.domain.RyaTypeUtils.floatRyaType)3 RyaTypeUtils.intRyaType (org.apache.rya.api.domain.RyaTypeUtils.intRyaType)3 RyaTypeUtils.longRyaType (org.apache.rya.api.domain.RyaTypeUtils.longRyaType)3 RyaTypeUtils.shortRyaType (org.apache.rya.api.domain.RyaTypeUtils.shortRyaType)3 RyaTypeUtils.stringRyaType (org.apache.rya.api.domain.RyaTypeUtils.stringRyaType)3 RyaTypeUtils.uriRyaType (org.apache.rya.api.domain.RyaTypeUtils.uriRyaType)3 Property (org.apache.rya.indexing.entity.model.Property)3 ImmutableList (com.google.common.collect.ImmutableList)2 Date (java.util.Date)2 List (java.util.List)2 Type (org.apache.rya.indexing.entity.model.Type)2