use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class MongoBatchUpdatePCJ method updatePCJResults.
private void updatePCJResults(final String ryaInstanceName, final String pcjId, final MongoClient client) throws InstanceDoesNotExistException, PCJDoesNotExistException, RyaClientException {
// Things that have to be closed before we exit.
Sail sail = null;
SailConnection sailConn = null;
try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(client, ryaInstanceName)) {
// Create an instance of Sail backed by the Rya instance.
sail = connectToRya(ryaInstanceName);
final SailRepository sailRepo = new SailRepository(sail);
final SailRepositoryConnection sailRepoConn = sailRepo.getConnection();
// Purge the old results from the PCJ.
try {
pcjStorage.purge(pcjId);
} catch (final PCJStorageException e) {
throw new RyaClientException("Could not batch update PCJ with ID '" + pcjId + "' because the old " + "results could not be purged from it.", e);
}
// Parse the PCJ's SPARQL query.
final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
final String sparql = metadata.getSparql();
sailConn = sail.getConnection();
final TupleQuery tupleQuery = sailRepoConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
// Execute the query.
final List<VisibilityBindingSet> batch = new ArrayList<>(1000);
tupleQuery.evaluate(new TupleQueryResultHandlerBase() {
@Override
public void handleSolution(final BindingSet bindingSet) throws TupleQueryResultHandlerException {
final VisibilityBindingSet result = new VisibilityBindingSet(bindingSet, "");
log.warn("Visibility information on the binding set is lost during a batch update." + " This can create data leaks.");
batch.add(result);
if (batch.size() == 1000) {
try {
pcjStorage.addResults(pcjId, batch);
} catch (final PCJStorageException e) {
throw new TupleQueryResultHandlerException("Fail to batch load new results into the PCJ with ID '" + pcjId + "'.", e);
}
batch.clear();
}
}
});
if (!batch.isEmpty()) {
pcjStorage.addResults(pcjId, batch);
batch.clear();
}
} catch (final MalformedQueryException | PCJStorageException | SailException | QueryEvaluationException | RepositoryException | TupleQueryResultHandlerException e) {
throw new RyaClientException("Fail to batch load new results into the PCJ with ID '" + pcjId + "'.", e);
} finally {
if (sailConn != null) {
try {
sailConn.close();
} catch (final SailException e) {
log.warn(e.getMessage(), e);
}
}
if (sail != null) {
try {
sail.shutDown();
} catch (final SailException e) {
log.warn(e.getMessage(), e);
}
}
}
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class CreateFluoPcj method withRyaIntegration.
/**
* Tells the Fluo PCJ Updater application to maintain a new PCJ.
* <p>
* This call scans Rya for Statement Pattern matches and inserts them into
* the Fluo application. The Fluo application will then maintain the intermediate
* results as new triples are inserted and export any new query results to the
* {@code pcjId} within the provided {@code pcjStorage}. This method requires that a
* PCJ table already exist for the query corresponding to the pcjId. By default, results will be exported
* to this table according to the Rya {@link ExportStrategy}.
*
* @param pcjId - Identifies the PCJ that will be updated by the Fluo app. (not null)
* @param pcjStorage - Provides access to the PCJ index. (not null)
* @param fluo - A connection to the Fluo application that updates the PCJ index. (not null)
* @param accumulo - Accumuo connector for connecting to Accumulo
* @param ryaInstance - name of Rya instance to connect to
* @return FluoQuery containing the metadata for the newly registered SPARQL query
* @throws MalformedQueryException The SPARQL query stored for the {@code pcjId} is malformed.
* @throws PcjException The PCJ Metadata for {@code pcjId} could not be read from {@code pcjStorage}.
* @throws RyaDAOException Historic PCJ results could not be loaded because of a problem with {@code rya}.
* @throws UnsupportedQueryException
*/
public FluoQuery withRyaIntegration(final String pcjId, final PrecomputedJoinStorage pcjStorage, final FluoClient fluo, final Connector accumulo, final String ryaInstance) throws MalformedQueryException, PcjException, RyaDAOException, UnsupportedQueryException {
requireNonNull(pcjId);
requireNonNull(pcjStorage);
requireNonNull(fluo);
requireNonNull(accumulo);
requireNonNull(ryaInstance);
// Parse the query's structure for the metadata that will be written to fluo.
final PcjMetadata pcjMetadata = pcjStorage.getPcjMetadata(pcjId);
final String sparql = pcjMetadata.getSparql();
return withRyaIntegration(pcjId, sparql, Sets.newHashSet(ExportStrategy.RYA), fluo, accumulo, ryaInstance);
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjTablesIT method createPcjTable.
/**
* Ensure that when a new PCJ table is created, it is initialized with the
* correct metadata values.
* <p>
* The method being tested is {@link PcjTables#createPcjTable(Connector, String, Set, String)}
*/
@Test
public void createPcjTable() throws PcjException, AccumuloException, AccumuloSecurityException {
final String sparql = "SELECT ?name ?age " + "{" + "FILTER(?age < 30) ." + "?name <http://hasAge> ?age." + "?name <http://playsSport> \"Soccer\" " + "}";
final Connector accumuloConn = cluster.getConnector();
// Create a PCJ table in the Mini Accumulo.
final String pcjTableName = new PcjTableNameFactory().makeTableName(getRyaInstanceName(), "testPcj");
final Set<VariableOrder> varOrders = new ShiftVarOrderFactory().makeVarOrders(new VariableOrder("name;age"));
final PcjTables pcjs = new PcjTables();
pcjs.createPcjTable(accumuloConn, pcjTableName, varOrders, sparql);
// Fetch the PcjMetadata and ensure it has the correct values.
final PcjMetadata pcjMetadata = pcjs.getPcjMetadata(accumuloConn, pcjTableName);
// Ensure the metadata matches the expected value.
final PcjMetadata expected = new PcjMetadata(sparql, 0L, varOrders);
assertEquals(expected, pcjMetadata);
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjTablesIT method createAndPopulatePcj.
/**
* Ensure the method that creates a new PCJ table, scans Rya for matches, and
* stores them in the PCJ table works.
* <p>
* The method being tested is: {@link PcjTables#createAndPopulatePcj(RepositoryConnection, Connector, String, String, String[], Optional)}
*/
@Test
public void createAndPopulatePcj() throws RepositoryException, PcjException, TableNotFoundException, BindingSetConversionException, AccumuloException, AccumuloSecurityException {
// Load some Triples into Rya.
final Set<Statement> triples = new HashSet<>();
triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://hasAge"), new NumericLiteralImpl(14, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://hasAge"), new NumericLiteralImpl(16, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://hasAge"), new NumericLiteralImpl(12, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://hasAge"), new NumericLiteralImpl(43, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
for (final Statement triple : triples) {
ryaConn.add(triple);
}
// Create a PCJ table that will include those triples in its results.
final String sparql = "SELECT ?name ?age " + "{" + "FILTER(?age < 30) ." + "?name <http://hasAge> ?age." + "?name <http://playsSport> \"Soccer\" " + "}";
final Connector accumuloConn = cluster.getConnector();
final String pcjTableName = new PcjTableNameFactory().makeTableName(getRyaInstanceName(), "testPcj");
// Create and populate the PCJ table.
final PcjTables pcjs = new PcjTables();
pcjs.createAndPopulatePcj(ryaConn, accumuloConn, pcjTableName, sparql, new String[] { "name", "age" }, Optional.<PcjVarOrderFactory>absent());
// Make sure the cardinality was updated.
final PcjMetadata metadata = pcjs.getPcjMetadata(accumuloConn, pcjTableName);
assertEquals(3, metadata.getCardinality());
// Scan Accumulo for the stored results.
final Multimap<String, BindingSet> fetchedResults = loadPcjResults(accumuloConn, pcjTableName);
// Ensure the expected results match those that were stored.
final MapBindingSet alice = new MapBindingSet();
alice.addBinding("name", new URIImpl("http://Alice"));
alice.addBinding("age", new NumericLiteralImpl(14, XMLSchema.INTEGER));
final MapBindingSet bob = new MapBindingSet();
bob.addBinding("name", new URIImpl("http://Bob"));
bob.addBinding("age", new NumericLiteralImpl(16, XMLSchema.INTEGER));
final MapBindingSet charlie = new MapBindingSet();
charlie.addBinding("name", new URIImpl("http://Charlie"));
charlie.addBinding("age", new NumericLiteralImpl(12, XMLSchema.INTEGER));
final Set<BindingSet> results = Sets.<BindingSet>newHashSet(alice, bob, charlie);
final Multimap<String, BindingSet> expectedResults = HashMultimap.create();
expectedResults.putAll("name;age", results);
expectedResults.putAll("age;name", results);
assertEquals(expectedResults, fetchedResults);
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class AccumuloPcjStorageIT method addResults.
@Test
public void addResults() throws AccumuloException, AccumuloSecurityException, PCJStorageException, MalformedQueryException {
// Setup the PCJ storage that will be tested against.
final Connector connector = super.getClusterInstance().getConnector();
final String ryaInstanceName = super.getRyaInstanceName();
try (final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(connector, ryaInstanceName)) {
// Create a PCJ.
final String sparql = "SELECT * WHERE { ?a <http://isA> ?b }";
final String pcjId = pcjStorage.createPcj(sparql);
// Add some binding sets to it.
final Set<VisibilityBindingSet> results = new HashSet<>();
final MapBindingSet aliceBS = new MapBindingSet();
aliceBS.addBinding("a", new URIImpl("http://Alice"));
aliceBS.addBinding("b", new URIImpl("http://Person"));
results.add(new VisibilityBindingSet(aliceBS, ""));
final MapBindingSet charlieBS = new MapBindingSet();
charlieBS.addBinding("a", new URIImpl("http://Charlie"));
charlieBS.addBinding("b", new URIImpl("http://Comedian"));
results.add(new VisibilityBindingSet(charlieBS, ""));
pcjStorage.addResults(pcjId, results);
// Make sure the PCJ metadata was updated.
final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
final Set<VariableOrder> varOrders = new ShiftVarOrderFactory().makeVarOrders(sparql);
final PcjMetadata expectedMetadata = new PcjMetadata(sparql, 2L, varOrders);
assertEquals(expectedMetadata, metadata);
}
}
Aggregations