use of org.apache.rya.api.domain.RyaStatement in project incubator-rya by apache.
the class MergeToolTest method testEmptyParentNewChild.
@Test
public void testEmptyParentNewChild() throws Exception {
final RyaStatement stmtMisP_NewC_addP_z = createRyaStatementUniqueAdd("zs_MisP_NewC", "zp_MisP_NewC", "zo_MisP_NewC", null, null, childDao);
AccumuloRyaUtils.printTable(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, parentConfig);
AccumuloRyaUtils.printTable(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
mergeToolRun(YESTERDAY);
AccumuloRyaUtils.printTable(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, parentConfig);
AccumuloRyaUtils.printTable(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
assertStatementInParent("Missing Parent New Child should add Parent.", 1, stmtMisP_NewC_addP_z);
}
use of org.apache.rya.api.domain.RyaStatement in project incubator-rya by apache.
the class MergeToolTest method testMergeTool.
@Test
public void testMergeTool() throws Exception {
// This statement was in both parent/child instances a month ago and is before the start time of yesterday
// but it was left alone. It should remain in the parent after merging.
final RyaStatement ryaStatementOutOfTimeRange = createRyaStatement("coach", "called", "timeout", LAST_MONTH);
// This statement was in both parent/child instances a month ago but after the start time of yesterday
// the parent deleted it and the child still has it. It should stay deleted in the parent after merging.
final RyaStatement ryaStatementParentDeletedAfter = createRyaStatement("parent", "deleted", "after", LAST_MONTH);
// This statement was added by the parent after the start time of yesterday and doesn't exist in the child.
// It should stay in the parent after merging.
final RyaStatement ryaStatementParentAddedAfter = createRyaStatement("parent", "added", "after", TODAY);
// This statement was in both parent/child instances a month ago but after the start time of yesterday
// the child deleted it and the parent still has it. It should be deleted from the parent after merging.
final RyaStatement ryaStatementChildDeletedAfter = createRyaStatement("child", "deleted", "after", LAST_MONTH);
// This statement was added by the child after the start time of yesterday and doesn't exist in the parent.
// It should be added to the parent after merging.
final RyaStatement ryaStatementChildAddedAfter = createRyaStatement("child", "added", "after", TODAY);
// This statement was modified by the child after the start of yesterday (The timestamp changes after updating)
// It should be updated in the parent to match the child.
final RyaStatement ryaStatementUpdatedByChild = createRyaStatement("bob", "catches", "ball", LAST_MONTH);
final RyaStatement ryaStatementUntouchedByChild = createRyaStatement("bill", "talks to", "john", LAST_MONTH);
final RyaStatement ryaStatementDeletedByChild = createRyaStatement("susan", "eats", "burgers", LAST_MONTH);
final RyaStatement ryaStatementAddedByChild = createRyaStatement("ronnie", "plays", "guitar", TODAY);
// This statement was modified by the child to change the column visibility.
// The parent should combine the child's visibility with its visibility.
final RyaStatement ryaStatementVisibilityDifferent = createRyaStatement("I", "see", "you", LAST_MONTH);
ryaStatementVisibilityDifferent.setColumnVisibility(PARENT_COLUMN_VISIBILITY.getExpression());
// Setup initial parent instance with 7 rows
// This is the state of the parent data (as it is today) before merging occurs which will use the specified start time of yesterday.
// Merging should keep statement
parentDao.add(ryaStatementOutOfTimeRange);
// Merging should update statement
parentDao.add(ryaStatementUpdatedByChild);
// Merging should keep statement
parentDao.add(ryaStatementUntouchedByChild);
// Merging should delete statement
parentDao.add(ryaStatementDeletedByChild);
// Merging should update statement
parentDao.add(ryaStatementVisibilityDifferent);
// Merging should keep statement
parentDao.add(ryaStatementParentAddedAfter);
// Merging should delete statement
parentDao.add(ryaStatementChildDeletedAfter);
// Simulate the child coming back with a modified data set before the merging occurs.
// (1 updated row, 1 row left alone because it was unchanged, 1 row outside time range,
// 1 row deleted, 1 new row added, 1 modified visibility, 1 deleted by child, 1 added by child).
// There should be 5 rows in the child instance (4 which will be scanned over from the start time).
ryaStatementUpdatedByChild.setObject(TestUtils.createRyaUri("football"));
ryaStatementUpdatedByChild.setTimestamp(TODAY.getTime());
ryaStatementVisibilityDifferent.setColumnVisibility(CHILD_COLUMN_VISIBILITY.getExpression());
childDao.add(ryaStatementOutOfTimeRange);
childDao.add(ryaStatementUpdatedByChild);
childDao.add(ryaStatementUntouchedByChild);
// Merging should add statement
childDao.add(ryaStatementAddedByChild);
childDao.add(ryaStatementVisibilityDifferent);
childDao.add(ryaStatementParentDeletedAfter);
// Merging should add statement
childDao.add(ryaStatementChildAddedAfter);
AccumuloRyaUtils.printTable(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, parentConfig);
AccumuloRyaUtils.printTable(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
log.info("Starting merge tool. Merging all data after the specified start time: " + YESTERDAY);
mergeToolRun(YESTERDAY);
for (final String tableSuffix : AccumuloInstanceDriver.TABLE_NAME_SUFFIXES) {
AccumuloRyaUtils.printTable(PARENT_TABLE_PREFIX + tableSuffix, parentConfig);
}
final Scanner scanner = AccumuloRyaUtils.getScanner(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, parentConfig);
final Iterator<Entry<Key, Value>> iterator = scanner.iterator();
int count = 0;
while (iterator.hasNext()) {
iterator.next();
count++;
}
// Make sure we have all of them in the parent.
assertEquals(7, count);
assertStatementInParent("Parent missing statement that untouched by the child", 1, ryaStatementUntouchedByChild);
assertStatementInParent("Parent missing statement that was out of time range", 1, ryaStatementOutOfTimeRange);
assertStatementInParent("Parent missing statement that was updated by the child", 1, ryaStatementUpdatedByChild);
assertStatementInParent("Parent missing statement that was added by the child", 1, ryaStatementAddedByChild);
assertStatementInParent("Parent has statement that the child deleted", 0, ryaStatementDeletedByChild);
// Check that it can be queried with parent's visibility
assertStatementInParent("Parent missing statement with parent visibility", 1, ryaStatementVisibilityDifferent);
// Check that it can be queried with child's visibility
parentConfig.set(RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH, CHILD_AUTH);
final Authorizations newParentAuths = AccumuloRyaUtils.addUserAuths(accumuloDualInstanceDriver.getParentUser(), accumuloDualInstanceDriver.getParentSecOps(), CHILD_AUTH);
accumuloDualInstanceDriver.getParentSecOps().changeUserAuthorizations(accumuloDualInstanceDriver.getParentUser(), newParentAuths);
assertStatementInParent("Parent missing statement with child visibility", 1, ryaStatementVisibilityDifferent);
// Check that it can NOT be queried with some other visibility
parentConfig.set(RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH, "bad_auth");
final CloseableIteration<RyaStatement, RyaDAOException> iter = parentDao.getQueryEngine().query(ryaStatementVisibilityDifferent, parentConfig);
count = 0;
try {
while (iter.hasNext()) {
iter.next();
count++;
}
} catch (final Exception e) {
// Expected
if (!(e.getCause() instanceof AccumuloSecurityException)) {
fail();
}
}
iter.close();
assertEquals(0, count);
// reset auth
parentConfig.set(RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH, PARENT_AUTH);
assertStatementInParent("Parent has statement it deleted later", 0, ryaStatementParentDeletedAfter);
assertStatementInParent("Parent missing statement it added later", 1, ryaStatementParentAddedAfter);
assertStatementInParent("Parent has statement child deleted later", 0, ryaStatementChildDeletedAfter);
assertStatementInParent("Parent missing statement child added later", 1, ryaStatementChildAddedAfter);
log.info("DONE");
}
use of org.apache.rya.api.domain.RyaStatement in project incubator-rya by apache.
the class CopyToolDemo method testCopyTool.
public void testCopyTool() throws Exception {
log.info("");
log.info("Setting up initial state of parent before copying to child...");
log.info("Adding data to parent...");
final int numRowsNotToCopy = 80;
final int numRowsToCopy = 20;
// Create Rya Statement before last month which won't be copied
final Random random = new Random();
for (int i = 1; i <= numRowsNotToCopy; i++) {
final long randTimeBeforeLastMonth = DemoUtilities.randLong(0, LAST_MONTH.getTime());
final String randVis = random.nextBoolean() ? PARENT_AUTH : "";
final RyaStatement ryaStatementOutOfTimeRange = createRyaStatement("Nobody", "sees", "me " + i, new Date(randTimeBeforeLastMonth));
ryaStatementOutOfTimeRange.setColumnVisibility(randVis.getBytes());
parentDao.add(ryaStatementOutOfTimeRange);
}
for (int i = 1; i <= numRowsToCopy; i++) {
final long randTimeAfterYesterdayAndBeforeToday = DemoUtilities.randLong(YESTERDAY.getTime(), TODAY.getTime());
final String randVis = random.nextBoolean() ? PARENT_AUTH : "";
final RyaStatement ryaStatementShouldCopy = createRyaStatement("bob", "copies", "susan " + i, new Date(randTimeAfterYesterdayAndBeforeToday));
ryaStatementShouldCopy.setColumnVisibility(randVis.getBytes());
parentDao.add(ryaStatementShouldCopy);
}
if (USE_COPY_FILE_OUTPUT) {
// Set up table splits
final SortedSet<Text> splits = new TreeSet<>();
for (char alphabet = 'a'; alphabet <= 'e'; alphabet++) {
final Text letter = new Text(alphabet + "");
splits.add(letter);
}
parentDao.getConnector().tableOperations().addSplits(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, splits);
}
log.info("Added " + (numRowsNotToCopy + numRowsToCopy) + " rows to parent SPO table.");
log.info("Parent SPO table output below:");
DemoUtilities.promptEnterKey(IS_PROMPTING_ENABLED);
AccumuloRyaUtils.printTablePretty(PARENT_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, parentConfig);
// AccumuloRyaUtils.printTablePretty(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
log.info("");
log.info("Total Rows in table: " + (numRowsNotToCopy + numRowsToCopy));
log.info("Number of Rows NOT to copy (out of time range): " + numRowsNotToCopy);
log.info("Number of Rows to copy (in time range): " + numRowsToCopy);
log.info("");
DemoUtilities.promptEnterKey(IS_PROMPTING_ENABLED);
log.info("Starting copy tool. Copying all data after the specified start time: " + YESTERDAY);
log.info("");
copyToolRun(YESTERDAY);
// Copy Tool made child instance so hook the tables and dao into the driver.
final String childUser = accumuloDualInstanceDriver.getChildUser();
final Connector childConnector = ConfigUtils.getConnector(childConfig);
accumuloDualInstanceDriver.getChildAccumuloInstanceDriver().setConnector(childConnector);
accumuloDualInstanceDriver.getChildAccumuloInstanceDriver().setUpTables();
accumuloDualInstanceDriver.getChildAccumuloInstanceDriver().setUpDao();
// Update child config to include changes made from copy process
final SecurityOperations childSecOps = accumuloDualInstanceDriver.getChildSecOps();
final Authorizations newChildAuths = AccumuloRyaUtils.addUserAuths(childUser, childSecOps, PARENT_AUTH);
childSecOps.changeUserAuthorizations(childUser, newChildAuths);
final String childAuthString = newChildAuths.toString();
final List<String> duplicateKeys = MergeTool.DUPLICATE_KEY_MAP.get(MRUtils.AC_AUTH_PROP);
childConfig.set(MRUtils.AC_AUTH_PROP, childAuthString);
for (final String key : duplicateKeys) {
childConfig.set(key, childAuthString);
}
// AccumuloRyaUtils.printTablePretty(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX, childConfig);
// AccumuloRyaUtils.printTablePretty(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX, childConfig);
AccumuloRyaUtils.printTablePretty(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
final Scanner scanner = AccumuloRyaUtils.getScanner(CHILD_TABLE_PREFIX + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, childConfig);
final Iterator<Entry<Key, Value>> iterator = scanner.iterator();
int count = 0;
while (iterator.hasNext()) {
iterator.next();
count++;
}
log.info("");
log.info("Total rows copied: " + count);
log.info("");
log.info("Demo done");
}
use of org.apache.rya.api.domain.RyaStatement in project incubator-rya by apache.
the class BaseRuleMapper method map.
@Override
protected void map(final Key key, final Value value, final Context context) throws IOException, InterruptedException {
final TripleRow row = new TripleRow(key.getRowData().toArray(), key.getColumnFamilyData().toArray(), key.getColumnQualifierData().toArray(), key.getTimestamp(), key.getColumnVisibilityData().toArray(), value == null ? null : value.get());
try {
// If there's no layout, copy the row directly
if (parentLayout == null) {
copyRow(key, value, context);
context.getCounter(Counters.DIRECT_ROWS_COPIED).increment(1);
} else // If there is a layout, deserialize the statement and insert it if it meets the condition
{
final RyaStatement rs = resolver.deserialize(parentLayout, row);
if (condition == null || CopyRule.accept(RyaToRdfConversions.convertStatement(rs), condition, strategy)) {
copyStatement(rs, context);
context.getCounter(Counters.STATEMENTS_COPIED).increment(1);
}
}
} catch (final TripleRowResolverException e) {
throw new IOException("Error deserializing triple", e);
} catch (final QueryEvaluationException e) {
throw new IOException("Error evaluating the filter condition", e);
}
}
use of org.apache.rya.api.domain.RyaStatement in project incubator-rya by apache.
the class MergeToolMapper method run.
/**
* Expert users can override this method for more complete control over
* the execution of the Mapper.
*
* @param context
* @throws IOException
*/
@Override
public void run(final Context context) throws IOException, InterruptedException {
setup(context);
this.context = context;
try {
RyaStatement parentRyaStatement = nextParentRyaStatement();
RyaStatement childRyaStatement = nextChildRyaStatement();
CompareKeysResult compareKeysResult = null;
// Iteratively compare parent keys to child keys until finished
while (compareKeysResult != CompareKeysResult.FINISHED) {
compareKeysResult = compareKeys(parentRyaStatement, childRyaStatement);
// Based on how the keys compare add or delete keys and advance the child or parent iterators forward
switch(compareKeysResult) {
case ADVANCE_CHILD:
childRyaStatement = nextChildRyaStatement();
break;
case ADVANCE_PARENT:
parentRyaStatement = nextParentRyaStatement();
break;
case ADVANCE_CHILD_AND_ADD:
final RyaStatement tempChildRyaStatement = childRyaStatement;
childRyaStatement = nextChildRyaStatement();
addKey(tempChildRyaStatement, context);
break;
case ADVANCE_PARENT_AND_DELETE:
final RyaStatement tempParentRyaStatement = parentRyaStatement;
parentRyaStatement = nextParentRyaStatement();
deleteKey(tempParentRyaStatement, context);
break;
case ADVANCE_BOTH:
final ColumnVisibility cv1 = new ColumnVisibility(parentRyaStatement.getColumnVisibility());
final ColumnVisibility cv2 = new ColumnVisibility(childRyaStatement.getColumnVisibility());
// Update new column visibility now if necessary
if (!cv1.equals(cv2) && !cv2.equals(AccumuloRdfConstants.EMPTY_CV)) {
final ColumnVisibility newCv = combineColumnVisibilities(cv1, cv2);
final RyaStatement newCvRyaStatement = updateRyaStatementColumnVisibility(parentRyaStatement, newCv);
deleteKey(parentRyaStatement, context);
addKey(newCvRyaStatement, context);
}
parentRyaStatement = nextParentRyaStatement();
childRyaStatement = nextChildRyaStatement();
break;
case FINISHED:
log.info("Finished scanning parent and child tables");
break;
default:
log.error("Unknown result: " + compareKeysResult);
break;
}
}
} catch (MutationsRejectedException | TripleRowResolverException e) {
log.error("Error encountered while merging", e);
} finally {
cleanup(context);
}
}
Aggregations