use of org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig in project incubator-rya by apache.
the class DuplicateDataDetectorIT method testCreateEntityNearDuplicateConfigDisabled.
@Test
public void testCreateEntityNearDuplicateConfigDisabled() throws EntityStorageException, TypeStorageException, ConfigurationException, ObjectStorageException {
// Create the types the Entity uses.
final TypeStorage typeStorage = new MongoTypeStorage(super.getMongoClient(), RYA_INSTANCE_NAME);
final Type personType = createPersonType();
final Type employeeType = createEmployeeType();
typeStorage.create(personType);
typeStorage.create(employeeType);
final Optional<Type> storedPersonType = typeStorage.get(personType.getId());
final Optional<Type> storedEmployeeType = typeStorage.get(employeeType.getId());
assertTrue(storedPersonType.isPresent());
assertTrue(storedEmployeeType.isPresent());
// Create it.
final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig(// boolean
new Tolerance(0.0, ToleranceType.DIFFERENCE), // byte
new Tolerance(0.0, ToleranceType.DIFFERENCE), // date
new Tolerance(500.0, ToleranceType.DIFFERENCE), // double
new Tolerance(0.0001, ToleranceType.PERCENTAGE), // float
new Tolerance(0.0001, ToleranceType.PERCENTAGE), // integer
new Tolerance(1.0, ToleranceType.DIFFERENCE), // long
new Tolerance(1.0, ToleranceType.DIFFERENCE), // short
new Tolerance(1.0, ToleranceType.DIFFERENCE), // string
new Tolerance(1.0, ToleranceType.DIFFERENCE), // uri
new Tolerance(1.0, ToleranceType.DIFFERENCE), new HashMap<String, List<String>>(), false);
final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector(duplicateDataConfig);
final EntityStorage entityStorage = new MongoEntityStorage(super.getMongoClient(), RYA_INSTANCE_NAME, duplicateDataDetector);
final Entity bobEntity = createBobEntity();
entityStorage.create(bobEntity);
assertTrue(entityStorage.get(bobEntity.getSubject()).isPresent());
final Builder duplicateBobBuilder = Entity.builder(createBobEntity());
duplicateBobBuilder.setSubject(createRyaUri("Robert"));
// Modify a property for each type that is within tolerance
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 41)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(250.76f)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(72.499)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(50001)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 2)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(123456789013L)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 1).minusYears(40))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 1))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(true)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bob.smitch01@gmail.com"))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("124 Fake St. Washington, DC 20024")));
duplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 556)));
final Entity duplicateBobEntity = duplicateBobBuilder.build();
// Data duplication detection is disabled so it will be created.
try {
entityStorage.create(duplicateBobEntity);
} catch (final EntityNearDuplicateException e) {
fail();
}
assertTrue(entityStorage.get(duplicateBobEntity.getSubject()).isPresent());
final Builder notDuplicateBobBuilder = Entity.builder(createBobEntity());
notDuplicateBobBuilder.setSubject(createRyaUri("Not Bob"));
// Modify a property for each type that is within tolerance
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 50)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(300.0f)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(100.0)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(60000)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 5)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(9L)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 10000000L).minusYears(40))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 10000000L))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(false)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bad.email.address@gmail.com"))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("123456789 Fake St. Washington, DC 20024")));
notDuplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 1000)));
final Entity notDuplicateBobEntity = notDuplicateBobBuilder.build();
// Data duplication detection is disabled so it will be created.
try {
entityStorage.create(notDuplicateBobEntity);
} catch (final EntityNearDuplicateException e) {
fail();
}
assertTrue(entityStorage.get(notDuplicateBobEntity.getSubject()).isPresent());
}
use of org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig in project incubator-rya by apache.
the class DuplicateDataDetectorIT method testCreateEntityNearDuplicate.
@Test
public void testCreateEntityNearDuplicate() throws EntityStorageException, TypeStorageException, ObjectStorageException {
// Create the types the Entity uses.
final TypeStorage typeStorage = new MongoTypeStorage(super.getMongoClient(), RYA_INSTANCE_NAME);
final Type personType = createPersonType();
final Type employeeType = createEmployeeType();
typeStorage.create(personType);
typeStorage.create(employeeType);
final Optional<Type> storedPersonType = typeStorage.get(personType.getId());
final Optional<Type> storedEmployeeType = typeStorage.get(employeeType.getId());
assertTrue(storedPersonType.isPresent());
assertTrue(storedEmployeeType.isPresent());
// Create it.
final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig(// boolean
new Tolerance(0.0, ToleranceType.DIFFERENCE), // byte
new Tolerance(0.0, ToleranceType.DIFFERENCE), // date
new Tolerance(500.0, ToleranceType.DIFFERENCE), // double
new Tolerance(0.0001, ToleranceType.PERCENTAGE), // float
new Tolerance(0.0001, ToleranceType.PERCENTAGE), // integer
new Tolerance(1.0, ToleranceType.DIFFERENCE), // long
new Tolerance(1.0, ToleranceType.DIFFERENCE), // short
new Tolerance(1.0, ToleranceType.DIFFERENCE), // string
new Tolerance(1.0, ToleranceType.DIFFERENCE), // uri
new Tolerance(1.0, ToleranceType.DIFFERENCE), new HashMap<String, List<String>>(), true);
final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector(duplicateDataConfig);
final EntityStorage entityStorage = new MongoEntityStorage(super.getMongoClient(), RYA_INSTANCE_NAME, duplicateDataDetector);
final Entity bobEntity = createBobEntity();
entityStorage.create(bobEntity);
assertTrue(entityStorage.get(bobEntity.getSubject()).isPresent());
final Builder duplicateBobBuilder = Entity.builder(createBobEntity());
duplicateBobBuilder.setSubject(createRyaUri("Robert"));
// Modify a property for each type that is within tolerance
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 41)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(250.76f)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(72.499)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(50001)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 2)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(123456789013L)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 1).minusYears(40))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 1))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(true)));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bob.smitch01@gmail.com"))));
duplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("124 Fake St. Washington, DC 20024")));
duplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 556)));
final Entity duplicateBobEntity = duplicateBobBuilder.build();
// Try to create another entity that's considered a duplicate.
// It will NOT be be created.
boolean hasDuplicate = false;
try {
entityStorage.create(duplicateBobEntity);
} catch (final EntityNearDuplicateException e) {
hasDuplicate = true;
}
assertTrue(hasDuplicate);
assertFalse(entityStorage.get(duplicateBobEntity.getSubject()).isPresent());
final Builder notDuplicateBobBuilder = Entity.builder(createBobEntity());
notDuplicateBobBuilder.setSubject(createRyaUri("Not Bob"));
// Modify a property for each type that is within tolerance
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 50)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(300.0f)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(100.0)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(60000)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 5)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(9L)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime() - 10000000L).minusYears(40))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(new Date(NOW.getTime() - 10000000L))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(false)));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bad.email.address@gmail.com"))));
notDuplicateBobBuilder.setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("123456789 Fake St. Washington, DC 20024")));
notDuplicateBobBuilder.setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 1000)));
final Entity notDuplicateBobEntity = notDuplicateBobBuilder.build();
// It will be created.
try {
entityStorage.create(notDuplicateBobEntity);
} catch (final EntityNearDuplicateException e) {
fail();
}
assertTrue(entityStorage.get(notDuplicateBobEntity.getSubject()).isPresent());
}
use of org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig in project incubator-rya by apache.
the class DuplicateDataDetectorIT method testReadConfigFile.
@Test
public void testReadConfigFile() throws SmartUriException, ConfigurationException {
final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig();
assertNotNull(duplicateDataConfig.getBooleanTolerance());
assertNotNull(duplicateDataConfig.getByteTolerance());
assertNotNull(duplicateDataConfig.getDateTolerance());
assertNotNull(duplicateDataConfig.getDoubleTolerance());
assertNotNull(duplicateDataConfig.getFloatTolerance());
assertNotNull(duplicateDataConfig.getIntegerTolerance());
assertNotNull(duplicateDataConfig.getLongTolerance());
assertNotNull(duplicateDataConfig.getShortTolerance());
assertNotNull(duplicateDataConfig.getStringTolerance());
assertNotNull(duplicateDataConfig.getUriTolerance());
assertNotNull(duplicateDataConfig.getEquivalentTermsMap());
assertNotNull(duplicateDataConfig.isDetectionEnabled());
}
Aggregations