use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class InputIT method historicAndStreamConflict.
/**
* Simulates the case where a Triple is added to Rya, a new query that
* includes the triple as a historic match is inserted into Fluo, and then
* the same triple is streamed into Fluo. The query's results will already
* include the Triple because they were added while the query was being
* created. This case should not fail or effect the end results in any way.
*/
@Test
public void historicAndStreamConflict() throws Exception {
// A query that finds people who talk to Eve and work at Chipotle.
final String sparql = "SELECT ?x WHERE { " + "?x <http://talksTo> <http://Eve>. " + "?x <http://worksAt> <http://Chipotle>." + "}";
// Triples that are loaded into Rya before the PCJ is created.
final ValueFactory vf = new ValueFactoryImpl();
final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
// Triples that will be streamed into Fluo after the PCJ has been created.
final Set<RyaStatement> streamedTriples = Sets.newHashSet(new RyaStatement(new RyaURI("http://Alice"), new RyaURI("http://talksTo"), new RyaURI("http://Eve")), new RyaStatement(new RyaURI("http://Alice"), new RyaURI("http://worksAt"), new RyaURI("http://Chipotle")));
// The expected final result.
final Set<BindingSet> expected = new HashSet<>();
final MapBindingSet bs = new MapBindingSet();
bs.addBinding("x", vf.createURI("http://Alice"));
expected.add(bs);
// Load the historic data into Rya.
final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
for (final Statement triple : historicTriples) {
ryaConn.add(triple);
}
ryaConn.close();
// Create the PCJ table.
final Connector accumuloConn = super.getAccumuloConnector();
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
// Tell the Fluo app to maintain the PCJ.
new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
// Ensure Alice is a match.
super.getMiniFluo().waitForObservers();
Set<BindingSet> results = new HashSet<>();
try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
while (resultsIt.hasNext()) {
results.add(resultsIt.next());
}
}
assertEquals(expected, results);
// Stream the same Alice triple into Fluo.
new InsertTriples().insert(fluoClient, streamedTriples, Optional.<String>absent());
// Verify the end results of the query is stiill only Alice.
super.getMiniFluo().waitForObservers();
results = new HashSet<>();
try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
while (resultsIt.hasNext()) {
results.add(resultsIt.next());
}
}
assertEquals(expected, results);
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class MongoBatchUpdatePCJIT method batchUpdate.
@Test
public void batchUpdate() throws Exception {
// Setup a Rya Client.
final MongoConnectionDetails connectionDetails = getConnectionDetails();
final RyaClient ryaClient = MongoRyaClientFactory.build(connectionDetails, getMongoClient());
// Install an instance of Rya on the mini accumulo cluster.
ryaClient.getInstall().install(conf.getRyaInstanceName(), InstallConfiguration.builder().setEnablePcjIndex(true).build());
// Load some statements into the Rya instance.
final ValueFactory vf = ValueFactoryImpl.getInstance();
final Collection<Statement> statements = new ArrayList<>();
statements.add(vf.createStatement(vf.createURI("urn:Alice"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Bob"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Charlie"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:David"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Eve"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Frank"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:George"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Hillary"), vf.createURI("urn:likes"), vf.createURI("urn:icecream")));
statements.add(vf.createStatement(vf.createURI("urn:Alice"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:Bob"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:Charlie"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:David"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:Eve"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:Frank"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:blue")));
statements.add(vf.createStatement(vf.createURI("urn:George"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:green")));
statements.add(vf.createStatement(vf.createURI("urn:Hillary"), vf.createURI("urn:hasEyeColor"), vf.createURI("urn:brown")));
ryaClient.getLoadStatements().loadStatements(conf.getRyaInstanceName(), statements);
try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(getMongoClient(), conf.getRyaInstanceName())) {
// Create a PCJ for a SPARQL query.
final String sparql = "SELECT ?name WHERE { ?name <urn:likes> <urn:icecream> . ?name <urn:hasEyeColor> <urn:blue> . }";
final String pcjId = pcjStorage.createPcj(sparql);
// Run the test.
ryaClient.getBatchUpdatePCJ().batchUpdate(conf.getRyaInstanceName(), pcjId);
// Verify the correct results were loaded into the PCJ table.
final Set<BindingSet> expectedResults = new HashSet<>();
MapBindingSet bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:Alice"));
expectedResults.add(bs);
bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:Bob"));
expectedResults.add(bs);
bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:Charlie"));
expectedResults.add(bs);
bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:David"));
expectedResults.add(bs);
bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:Eve"));
expectedResults.add(bs);
bs = new MapBindingSet();
bs.addBinding("name", vf.createURI("urn:Frank"));
expectedResults.add(bs);
final Set<BindingSet> results = new HashSet<>();
try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
while (resultsIt.hasNext()) {
results.add(resultsIt.next());
}
}
assertEquals(expectedResults, results);
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class MongoPcjIndexSetProvider method getIndices.
@Override
protected List<ExternalTupleSet> getIndices() throws PcjIndexSetException {
try {
final StatefulMongoDBRdfConfiguration mongoConf = (StatefulMongoDBRdfConfiguration) conf;
final MongoClient client = mongoConf.getMongoClient();
final MongoPcjDocuments pcjDocs = new MongoPcjDocuments(client, mongoConf.getRyaInstanceName());
List<String> documents = null;
documents = mongoConf.getPcjTables();
// this maps associates pcj document name with pcj sparql query
final Map<String, String> indexDocuments = Maps.newLinkedHashMap();
try (final PrecomputedJoinStorage storage = new MongoPcjStorage(client, mongoConf.getRyaInstanceName())) {
final boolean docsProvided = documents != null && !documents.isEmpty();
if (docsProvided) {
// if tables provided, associate table name with sparql
for (final String doc : documents) {
indexDocuments.put(doc, storage.getPcjMetadata(doc).getSparql());
}
} else if (hasRyaDetails()) {
// If this is a newer install of Rya, and it has PCJ Details, then
// use those.
final List<String> ids = storage.listPcjs();
for (final String pcjId : ids) {
indexDocuments.put(pcjId, storage.getPcjMetadata(pcjId).getSparql());
}
} else {
// Otherwise figure it out by getting document IDs.
documents = pcjDocs.listPcjDocuments();
for (final String pcjId : documents) {
if (pcjId.startsWith("INDEX")) {
indexDocuments.put(pcjId, pcjDocs.getPcjMetadata(pcjId).getSparql());
}
}
}
}
final List<ExternalTupleSet> index = Lists.newArrayList();
if (indexDocuments.isEmpty()) {
log.info("No Index found");
} else {
for (final String pcjID : indexDocuments.keySet()) {
final String indexSparqlString = indexDocuments.get(pcjID);
index.add(new MongoPcjQueryNode(indexSparqlString, pcjID, pcjDocs));
}
}
return index;
} catch (final PCJStorageException | MalformedQueryException e) {
throw new PcjIndexSetException("Failed to get indicies for this PCJ index.", e);
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class RyaTableNames method getTableNames.
/**
* Get the the Accumulo table names that are used by an instance of Rya.
*
* @param ryaInstanceName - The name of the Rya instance. (not null)
* @param conn - A connector to the host Accumulo instance. (not null)
* @return The Accumulo table names that are used by the Rya instance.
* @throws NotInitializedException The instance's Rya Details have not been initialized.
* @throws RyaDetailsRepositoryException General problem with the Rya Details repository.
* @throws PCJStorageException General problem with the PCJ storage.
*/
public List<String> getTableNames(final String ryaInstanceName, final Connector conn) throws NotInitializedException, RyaDetailsRepositoryException, PCJStorageException {
// Build the list of tables that may be present within the Rya instance.
final List<String> tables = new ArrayList<>();
// Core Rya tables.
final TableLayoutStrategy coreTableNames = new TablePrefixLayoutStrategy(ryaInstanceName);
tables.add(coreTableNames.getSpo());
tables.add(coreTableNames.getPo());
tables.add(coreTableNames.getOsp());
tables.add(coreTableNames.getEval());
tables.add(coreTableNames.getNs());
tables.add(coreTableNames.getProspects());
tables.add(coreTableNames.getSelectivity());
// Rya Details table.
tables.add(AccumuloRyaInstanceDetailsRepository.makeTableName(ryaInstanceName));
// Secondary Indexer Tables.
final RyaDetailsRepository detailsRepo = new AccumuloRyaInstanceDetailsRepository(conn, ryaInstanceName);
final RyaDetails details = detailsRepo.getRyaInstanceDetails();
if (details.getEntityCentricIndexDetails().isEnabled()) {
tables.add(EntityCentricIndex.makeTableName(ryaInstanceName));
}
if (details.getFreeTextIndexDetails().isEnabled()) {
tables.addAll(AccumuloFreeTextIndexer.makeTableNames(ryaInstanceName));
}
if (details.getTemporalIndexDetails().isEnabled()) {
tables.add(AccumuloTemporalIndexer.makeTableName(ryaInstanceName));
}
if (details.getPCJIndexDetails().isEnabled()) {
try (final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(conn, ryaInstanceName)) {
final List<String> pcjIds = pcjStorage.listPcjs();
final PcjTableNameFactory tableNameFactory = new PcjTableNameFactory();
for (final String pcjId : pcjIds) {
tables.add(tableNameFactory.makeTableName(ryaInstanceName, pcjId));
}
}
}
// Verify they actually exist. If any don't, remove them from the list.
final TableOperations tableOps = conn.tableOperations();
final Iterator<String> tablesIt = tables.iterator();
while (tablesIt.hasNext()) {
final String table = tablesIt.next();
if (!tableOps.exists(table)) {
tablesIt.remove();
}
}
return tables;
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class MongoBatchUpdatePCJ method updatePCJResults.
private void updatePCJResults(final String ryaInstanceName, final String pcjId, final MongoClient client) throws InstanceDoesNotExistException, PCJDoesNotExistException, RyaClientException {
// Things that have to be closed before we exit.
Sail sail = null;
SailConnection sailConn = null;
try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(client, ryaInstanceName)) {
// Create an instance of Sail backed by the Rya instance.
sail = connectToRya(ryaInstanceName);
final SailRepository sailRepo = new SailRepository(sail);
final SailRepositoryConnection sailRepoConn = sailRepo.getConnection();
// Purge the old results from the PCJ.
try {
pcjStorage.purge(pcjId);
} catch (final PCJStorageException e) {
throw new RyaClientException("Could not batch update PCJ with ID '" + pcjId + "' because the old " + "results could not be purged from it.", e);
}
// Parse the PCJ's SPARQL query.
final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
final String sparql = metadata.getSparql();
sailConn = sail.getConnection();
final TupleQuery tupleQuery = sailRepoConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
// Execute the query.
final List<VisibilityBindingSet> batch = new ArrayList<>(1000);
tupleQuery.evaluate(new TupleQueryResultHandlerBase() {
@Override
public void handleSolution(final BindingSet bindingSet) throws TupleQueryResultHandlerException {
final VisibilityBindingSet result = new VisibilityBindingSet(bindingSet, "");
log.warn("Visibility information on the binding set is lost during a batch update." + " This can create data leaks.");
batch.add(result);
if (batch.size() == 1000) {
try {
pcjStorage.addResults(pcjId, batch);
} catch (final PCJStorageException e) {
throw new TupleQueryResultHandlerException("Fail to batch load new results into the PCJ with ID '" + pcjId + "'.", e);
}
batch.clear();
}
}
});
if (!batch.isEmpty()) {
pcjStorage.addResults(pcjId, batch);
batch.clear();
}
} catch (final MalformedQueryException | PCJStorageException | SailException | QueryEvaluationException | RepositoryException | TupleQueryResultHandlerException e) {
throw new RyaClientException("Fail to batch load new results into the PCJ with ID '" + pcjId + "'.", e);
} finally {
if (sailConn != null) {
try {
sailConn.close();
} catch (final SailException e) {
log.warn(e.getMessage(), e);
}
}
if (sail != null) {
try {
sail.shutDown();
} catch (final SailException e) {
log.warn(e.getMessage(), e);
}
}
}
}
Aggregations