use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjDocumentsIntegrationTest method createAndPopulatePcj.
/**
* Ensure the method that creates a new PCJ table, scans Rya for matches, and
* stores them in the PCJ table works.
* <p>
* The method being tested is: {@link PcjTables#createAndPopulatePcj(RepositoryConnection, Connector, String, String, String[], Optional)}
*/
@Test
public void createAndPopulatePcj() throws Exception {
final MongoDBRyaDAO dao = new MongoDBRyaDAO();
dao.setConf(new StatefulMongoDBRdfConfiguration(conf, getMongoClient()));
dao.init();
final RdfCloudTripleStore ryaStore = new RdfCloudTripleStore();
ryaStore.setRyaDAO(dao);
ryaStore.initialize();
final SailRepositoryConnection ryaConn = new RyaSailRepository(ryaStore).getConnection();
ryaConn.begin();
try {
// Load some Triples into Rya.
final Set<Statement> triples = new HashSet<>();
triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://hasAge"), new NumericLiteralImpl(14, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://hasAge"), new NumericLiteralImpl(16, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://hasAge"), new NumericLiteralImpl(12, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://hasAge"), new NumericLiteralImpl(43, XMLSchema.INTEGER)));
triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
for (final Statement triple : triples) {
ryaConn.add(triple);
}
// Create a PCJ table that will include those triples in its results.
final String sparql = "SELECT ?name ?age " + "{" + "FILTER(?age < 30) ." + "?name <http://hasAge> ?age." + "?name <http://playsSport> \"Soccer\" " + "}";
final String pcjTableName = "testPcj";
// Create and populate the PCJ table.
final MongoPcjDocuments pcjs = new MongoPcjDocuments(getMongoClient(), conf.getRyaInstanceName());
pcjs.createAndPopulatePcj(ryaConn, pcjTableName, sparql);
// Make sure the cardinality was updated.
final PcjMetadata metadata = pcjs.getPcjMetadata(pcjTableName);
assertEquals(3, metadata.getCardinality());
// Scan Accumulo for the stored results.
final Collection<BindingSet> fetchedResults = loadPcjResults(pcjTableName);
// Ensure the expected results match those that were stored.
final MapBindingSet alice = new MapBindingSet();
alice.addBinding("name", new URIImpl("http://Alice"));
alice.addBinding("age", new NumericLiteralImpl(14, XMLSchema.INTEGER));
final MapBindingSet bob = new MapBindingSet();
bob.addBinding("name", new URIImpl("http://Bob"));
bob.addBinding("age", new NumericLiteralImpl(16, XMLSchema.INTEGER));
final MapBindingSet charlie = new MapBindingSet();
charlie.addBinding("name", new URIImpl("http://Charlie"));
charlie.addBinding("age", new NumericLiteralImpl(12, XMLSchema.INTEGER));
final Set<BindingSet> expected = Sets.<BindingSet>newHashSet(alice, bob, charlie);
assertEquals(expected, fetchedResults);
} finally {
ryaConn.close();
ryaStore.shutDown();
}
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class MongoPcjDocuments method populatePcj.
/**
* Scan Rya for results that solve the PCJ's query and store them in the PCJ
* document.
* <p>
* This method assumes the PCJ document has already been created.
*
* @param pcjId - The Id of the PCJ that will receive the results. (not null)
* @param ryaConn - A connection to the Rya store that will be queried to find results. (not null)
* @throws PCJStorageException If results could not be written to the PCJ results document,
* the PCJ results document does not exist, or the query that is being execute was malformed.
*/
public void populatePcj(final String pcjId, final RepositoryConnection ryaConn) throws PCJStorageException {
checkNotNull(pcjId);
checkNotNull(ryaConn);
try {
// Fetch the query that needs to be executed from the PCJ metadata document.
final PcjMetadata pcjMetadata = getPcjMetadata(pcjId);
final String sparql = pcjMetadata.getSparql();
// Query Rya for results to the SPARQL query.
final TupleQuery query = ryaConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
final TupleQueryResult results = query.evaluate();
// Load batches of 1000 of them at a time into the PCJ results document.
final Set<VisibilityBindingSet> batch = new HashSet<>(1000);
while (results.hasNext()) {
final VisibilityBindingSet bs = new VisibilityBindingSet(results.next());
batch.add(bs);
if (batch.size() == 1000) {
addResults(pcjId, batch);
batch.clear();
}
}
if (!batch.isEmpty()) {
addResults(pcjId, batch);
}
} catch (RepositoryException | MalformedQueryException | QueryEvaluationException e) {
throw new PCJStorageException("Could not populate a PCJ document with Rya results for the pcj with Id: " + pcjId, e);
}
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjTables method getPcjMetadata.
/**
* Fetch the {@link PCJMetadata} from an Accumulo table.
* <p>
* This method assumes the PCJ table has already been created.
*
* @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
* @param pcjTableName - The name of the table that will be search. (not null)
* @return The PCJ Metadata that has been stolred in the in the PCJ Table.
* @throws PCJStorageException The PCJ Table does not exist.
*/
public PcjMetadata getPcjMetadata(final Connector accumuloConn, final String pcjTableName) throws PCJStorageException {
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
Scanner scanner = null;
try {
// Create an Accumulo scanner that iterates through the metadata entries.
scanner = accumuloConn.createScanner(pcjTableName, new Authorizations());
final Iterator<Entry<Key, Value>> entries = scanner.iterator();
// No metadata has been stored in the table yet.
if (!entries.hasNext()) {
throw new PCJStorageException("Could not find any PCJ metadata in the table named: " + pcjTableName);
}
// Fetch the metadata from the entries. Assuming they all have the same cardinality and sparql query.
String sparql = null;
Long cardinality = null;
final Set<VariableOrder> varOrders = new HashSet<>();
while (entries.hasNext()) {
final Entry<Key, Value> entry = entries.next();
final Text columnQualifier = entry.getKey().getColumnQualifier();
final byte[] value = entry.getValue().get();
if (columnQualifier.equals(PCJ_METADATA_SPARQL_QUERY)) {
sparql = stringLexicoder.decode(value);
} else if (columnQualifier.equals(PCJ_METADATA_CARDINALITY)) {
cardinality = longLexicoder.decode(value);
} else if (columnQualifier.equals(PCJ_METADATA_VARIABLE_ORDERS)) {
for (final String varOrderStr : listLexicoder.decode(value)) {
varOrders.add(new VariableOrder(varOrderStr));
}
}
}
return new PcjMetadata(sparql, cardinality, varOrders);
} catch (final TableNotFoundException e) {
throw new PCJStorageException("Could not add results to a PCJ because the PCJ table does not exist.", e);
} finally {
if (scanner != null) {
scanner.close();
}
}
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjTables method createPcjTable.
/**
* Create a new PCJ table within an Accumulo instance for a SPARQL query.
* For example, calling the function like this:
* <pre>
* PcjTables.createPcjTable(
* accumuloConn,
*
* "foo_INDEX_query1234",
*
* Sets.newHashSet(
* new VariableOrder("city;worker;customer"),
* new VariableOrder("worker;customer;city") ,
* new VariableOrder("customer;city;worker")),
*
* "SELECT ?customer ?worker ?city { " +
* "?customer <http://talksTo> ?worker. " +
* "?worker <http://livesIn> ?city. " +
* "?worker <http://worksAt> <http://Home>. " +
* "}");
* </pre>
* </p>
* Will result in an Accumulo table named "foo_INDEX_query1234" with the following entries:
* <table border="1" style="width:100%">
* <tr> <th>Row ID</td> <th>Column</td> <th>Value</td> </tr>
* <tr> <td>pcjMetadata</td> <td>metadata:sparql</td> <td> ... UTF-8 bytes encoding the query string ... </td> </tr>
* <tr> <td>pcjMetadata</td> <td>metadata:cardinality</td> <td> The query's cardinality </td> </tr>
* <tr> <td>pcjMetadata</td> <td>metadata:variableOrders</td> <td> The variable orders the results are written to </td> </tr>
* </table>
*
* @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
* @param pcjTableName - The name of the table that will be created. (not null)
* @param varOrders - The variable orders the results within the table will be written to. (not null)
* @param sparql - The query this table's results solves. (not null)
* @throws PCJStorageException Could not create a new PCJ table either because Accumulo
* would not let us create it or the PCJ metadata was not able to be written to it.
*/
public void createPcjTable(final Connector accumuloConn, final String pcjTableName, final Set<VariableOrder> varOrders, final String sparql) throws PCJStorageException {
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
checkNotNull(varOrders);
checkNotNull(sparql);
final TableOperations tableOps = accumuloConn.tableOperations();
if (!tableOps.exists(pcjTableName)) {
BatchWriter writer = null;
try {
// Create the new table in Accumulo.
tableOps.create(pcjTableName);
// Write the PCJ Metadata to the newly created table.
final PcjMetadata pcjMetadata = new PcjMetadata(sparql, 0L, varOrders);
final List<Mutation> mutations = makeWriteMetadataMutations(pcjMetadata);
writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
writer.addMutations(mutations);
} catch (final TableExistsException e) {
log.warn("Something else just created the Rya PCJ export table named '" + pcjTableName + "'. This is unexpected, but we will continue as normal.");
} catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
throw new PCJStorageException("Could not create a new PCJ named: " + pcjTableName, e);
} finally {
if (writer != null) {
try {
writer.close();
} catch (final MutationsRejectedException e) {
log.error("Mutations rejected while creating the PCJ table.", e);
}
}
}
}
}
use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.
the class PcjTables method writeResults.
/**
* Add a collection of results to a specific PCJ table.
*
* @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
* @param pcjTableName - The name of the PCJ table that will receive the results. (not null)
* @param results - Binding sets that will be written to the PCJ table. (not null)
* @throws PCJStorageException The provided PCJ table doesn't exist, is missing the
* PCJ metadata, or the result could not be written to it.
*/
private void writeResults(final Connector accumuloConn, final String pcjTableName, final Collection<VisibilityBindingSet> results) throws PCJStorageException {
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
checkNotNull(results);
// Fetch the variable orders from the PCJ table.
final PcjMetadata metadata = getPcjMetadata(accumuloConn, pcjTableName);
// Write each result formatted using each of the variable orders.
BatchWriter writer = null;
try {
writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
for (final VisibilityBindingSet result : results) {
final Set<Mutation> addResultMutations = makeWriteResultMutations(metadata.getVarOrders(), result);
writer.addMutations(addResultMutations);
}
} catch (TableNotFoundException | MutationsRejectedException e) {
throw new PCJStorageException("Could not add results to the PCJ table named: " + pcjTableName, e);
} finally {
if (writer != null) {
try {
writer.close();
} catch (final MutationsRejectedException e) {
throw new PCJStorageException("Could not add results to a PCJ table because some of the mutations were rejected.", e);
}
}
}
}
Aggregations