Search in sources :

Example 16 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjDocumentsIntegrationTest method createAndPopulatePcj.

/**
 * Ensure the method that creates a new PCJ table, scans Rya for matches, and
 * stores them in the PCJ table works.
 * <p>
 * The method being tested is: {@link PcjTables#createAndPopulatePcj(RepositoryConnection, Connector, String, String, String[], Optional)}
 */
@Test
public void createAndPopulatePcj() throws Exception {
    final MongoDBRyaDAO dao = new MongoDBRyaDAO();
    dao.setConf(new StatefulMongoDBRdfConfiguration(conf, getMongoClient()));
    dao.init();
    final RdfCloudTripleStore ryaStore = new RdfCloudTripleStore();
    ryaStore.setRyaDAO(dao);
    ryaStore.initialize();
    final SailRepositoryConnection ryaConn = new RyaSailRepository(ryaStore).getConnection();
    ryaConn.begin();
    try {
        // Load some Triples into Rya.
        final Set<Statement> triples = new HashSet<>();
        triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://hasAge"), new NumericLiteralImpl(14, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://hasAge"), new NumericLiteralImpl(16, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://hasAge"), new NumericLiteralImpl(12, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://hasAge"), new NumericLiteralImpl(43, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        for (final Statement triple : triples) {
            ryaConn.add(triple);
        }
        // Create a PCJ table that will include those triples in its results.
        final String sparql = "SELECT ?name ?age " + "{" + "FILTER(?age < 30) ." + "?name <http://hasAge> ?age." + "?name <http://playsSport> \"Soccer\" " + "}";
        final String pcjTableName = "testPcj";
        // Create and populate the PCJ table.
        final MongoPcjDocuments pcjs = new MongoPcjDocuments(getMongoClient(), conf.getRyaInstanceName());
        pcjs.createAndPopulatePcj(ryaConn, pcjTableName, sparql);
        // Make sure the cardinality was updated.
        final PcjMetadata metadata = pcjs.getPcjMetadata(pcjTableName);
        assertEquals(3, metadata.getCardinality());
        // Scan Accumulo for the stored results.
        final Collection<BindingSet> fetchedResults = loadPcjResults(pcjTableName);
        // Ensure the expected results match those that were stored.
        final MapBindingSet alice = new MapBindingSet();
        alice.addBinding("name", new URIImpl("http://Alice"));
        alice.addBinding("age", new NumericLiteralImpl(14, XMLSchema.INTEGER));
        final MapBindingSet bob = new MapBindingSet();
        bob.addBinding("name", new URIImpl("http://Bob"));
        bob.addBinding("age", new NumericLiteralImpl(16, XMLSchema.INTEGER));
        final MapBindingSet charlie = new MapBindingSet();
        charlie.addBinding("name", new URIImpl("http://Charlie"));
        charlie.addBinding("age", new NumericLiteralImpl(12, XMLSchema.INTEGER));
        final Set<BindingSet> expected = Sets.<BindingSet>newHashSet(alice, bob, charlie);
        assertEquals(expected, fetchedResults);
    } finally {
        ryaConn.close();
        ryaStore.shutDown();
    }
}
Also used : RdfCloudTripleStore(org.apache.rya.rdftriplestore.RdfCloudTripleStore) MapBindingSet(org.openrdf.query.impl.MapBindingSet) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) StatefulMongoDBRdfConfiguration(org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration) Statement(org.openrdf.model.Statement) RyaSailRepository(org.apache.rya.rdftriplestore.RyaSailRepository) URIImpl(org.openrdf.model.impl.URIImpl) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) MongoDBRyaDAO(org.apache.rya.mongodb.MongoDBRyaDAO) LiteralImpl(org.openrdf.model.impl.LiteralImpl) NumericLiteralImpl(org.openrdf.model.impl.NumericLiteralImpl) NumericLiteralImpl(org.openrdf.model.impl.NumericLiteralImpl) StatementImpl(org.openrdf.model.impl.StatementImpl) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class MongoPcjDocuments method populatePcj.

/**
 * Scan Rya for results that solve the PCJ's query and store them in the PCJ
 * document.
 * <p>
 * This method assumes the PCJ document has already been created.
 *
 * @param pcjId - The Id of the PCJ that will receive the results. (not null)
 * @param ryaConn - A connection to the Rya store that will be queried to find results. (not null)
 * @throws PCJStorageException If results could not be written to the PCJ results document,
 *  the PCJ results document does not exist, or the query that is being execute was malformed.
 */
public void populatePcj(final String pcjId, final RepositoryConnection ryaConn) throws PCJStorageException {
    checkNotNull(pcjId);
    checkNotNull(ryaConn);
    try {
        // Fetch the query that needs to be executed from the PCJ metadata document.
        final PcjMetadata pcjMetadata = getPcjMetadata(pcjId);
        final String sparql = pcjMetadata.getSparql();
        // Query Rya for results to the SPARQL query.
        final TupleQuery query = ryaConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
        final TupleQueryResult results = query.evaluate();
        // Load batches of 1000 of them at a time into the PCJ results document.
        final Set<VisibilityBindingSet> batch = new HashSet<>(1000);
        while (results.hasNext()) {
            final VisibilityBindingSet bs = new VisibilityBindingSet(results.next());
            batch.add(bs);
            if (batch.size() == 1000) {
                addResults(pcjId, batch);
                batch.clear();
            }
        }
        if (!batch.isEmpty()) {
            addResults(pcjId, batch);
        }
    } catch (RepositoryException | MalformedQueryException | QueryEvaluationException e) {
        throw new PCJStorageException("Could not populate a PCJ document with Rya results for the pcj with Id: " + pcjId, e);
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) MalformedQueryException(org.openrdf.query.MalformedQueryException) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) TupleQuery(org.openrdf.query.TupleQuery) RepositoryException(org.openrdf.repository.RepositoryException) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) TupleQueryResult(org.openrdf.query.TupleQueryResult) HashSet(java.util.HashSet)

Example 18 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjTables method getPcjMetadata.

/**
 * Fetch the {@link PCJMetadata} from an Accumulo table.
 * <p>
 * This method assumes the PCJ table has already been created.
 *
 * @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
 * @param pcjTableName - The name of the table that will be search. (not null)
 * @return The PCJ Metadata that has been stolred in the in the PCJ Table.
 * @throws PCJStorageException The PCJ Table does not exist.
 */
public PcjMetadata getPcjMetadata(final Connector accumuloConn, final String pcjTableName) throws PCJStorageException {
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    Scanner scanner = null;
    try {
        // Create an Accumulo scanner that iterates through the metadata entries.
        scanner = accumuloConn.createScanner(pcjTableName, new Authorizations());
        final Iterator<Entry<Key, Value>> entries = scanner.iterator();
        // No metadata has been stored in the table yet.
        if (!entries.hasNext()) {
            throw new PCJStorageException("Could not find any PCJ metadata in the table named: " + pcjTableName);
        }
        // Fetch the metadata from the entries. Assuming they all have the same cardinality and sparql query.
        String sparql = null;
        Long cardinality = null;
        final Set<VariableOrder> varOrders = new HashSet<>();
        while (entries.hasNext()) {
            final Entry<Key, Value> entry = entries.next();
            final Text columnQualifier = entry.getKey().getColumnQualifier();
            final byte[] value = entry.getValue().get();
            if (columnQualifier.equals(PCJ_METADATA_SPARQL_QUERY)) {
                sparql = stringLexicoder.decode(value);
            } else if (columnQualifier.equals(PCJ_METADATA_CARDINALITY)) {
                cardinality = longLexicoder.decode(value);
            } else if (columnQualifier.equals(PCJ_METADATA_VARIABLE_ORDERS)) {
                for (final String varOrderStr : listLexicoder.decode(value)) {
                    varOrders.add(new VariableOrder(varOrderStr));
                }
            }
        }
        return new PcjMetadata(sparql, cardinality, varOrders);
    } catch (final TableNotFoundException e) {
        throw new PCJStorageException("Could not add results to a PCJ because the PCJ table does not exist.", e);
    } finally {
        if (scanner != null) {
            scanner.close();
        }
    }
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) Text(org.apache.hadoop.io.Text) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 19 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjTables method createPcjTable.

/**
 * Create a new PCJ table within an Accumulo instance for a SPARQL query.
 * For example, calling the function like this:
 * <pre>
 * PcjTables.createPcjTable(
 *     accumuloConn,
 *
 *     "foo_INDEX_query1234",
 *
 *     Sets.newHashSet(
 *         new VariableOrder("city;worker;customer"),
 *         new VariableOrder("worker;customer;city") ,
 *         new VariableOrder("customer;city;worker")),
 *
 *     "SELECT ?customer ?worker ?city { " +
 *            "?customer &lt;http://talksTo> ?worker. " +
 *            "?worker &lt;http://livesIn> ?city. " +
 *            "?worker &lt;http://worksAt> &lt;http://Home>. " +
 *     "}");
 * </pre>
 * </p>
 * Will result in an Accumulo table named "foo_INDEX_query1234" with the following entries:
 * <table border="1" style="width:100%">
 *   <tr> <th>Row ID</td>  <th>Column</td>  <th>Value</td> </tr>
 *   <tr> <td>pcjMetadata</td> <td>metadata:sparql</td> <td> ... UTF-8 bytes encoding the query string ... </td> </tr>
 *   <tr> <td>pcjMetadata</td> <td>metadata:cardinality</td> <td> The query's cardinality </td> </tr>
 *   <tr> <td>pcjMetadata</td> <td>metadata:variableOrders</td> <td> The variable orders the results are written to </td> </tr>
 * </table>
 *
 * @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
 * @param pcjTableName - The name of the table that will be created. (not null)
 * @param varOrders - The variable orders the results within the table will be written to. (not null)
 * @param sparql - The query this table's results solves. (not null)
 * @throws PCJStorageException Could not create a new PCJ table either because Accumulo
 *   would not let us create it or the PCJ metadata was not able to be written to it.
 */
public void createPcjTable(final Connector accumuloConn, final String pcjTableName, final Set<VariableOrder> varOrders, final String sparql) throws PCJStorageException {
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(varOrders);
    checkNotNull(sparql);
    final TableOperations tableOps = accumuloConn.tableOperations();
    if (!tableOps.exists(pcjTableName)) {
        BatchWriter writer = null;
        try {
            // Create the new table in Accumulo.
            tableOps.create(pcjTableName);
            // Write the PCJ Metadata to the newly created table.
            final PcjMetadata pcjMetadata = new PcjMetadata(sparql, 0L, varOrders);
            final List<Mutation> mutations = makeWriteMetadataMutations(pcjMetadata);
            writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
            writer.addMutations(mutations);
        } catch (final TableExistsException e) {
            log.warn("Something else just created the Rya PCJ export table named '" + pcjTableName + "'. This is unexpected, but we will continue as normal.");
        } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
            throw new PCJStorageException("Could not create a new PCJ named: " + pcjTableName, e);
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (final MutationsRejectedException e) {
                    log.error("Mutations rejected while creating the PCJ table.", e);
                }
            }
        }
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) TableOperations(org.apache.accumulo.core.client.admin.TableOperations) TableExistsException(org.apache.accumulo.core.client.TableExistsException) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) ConditionalMutation(org.apache.accumulo.core.data.ConditionalMutation) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException)

Example 20 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjTables method writeResults.

/**
 * Add a collection of results to a specific PCJ table.
 *
 * @param accumuloConn - A connection to the Accumulo that hosts the PCJ table. (not null)
 * @param pcjTableName - The name of the PCJ table that will receive the results. (not null)
 * @param results - Binding sets that will be written to the PCJ table. (not null)
 * @throws PCJStorageException The provided PCJ table doesn't exist, is missing the
 *   PCJ metadata, or the result could not be written to it.
 */
private void writeResults(final Connector accumuloConn, final String pcjTableName, final Collection<VisibilityBindingSet> results) throws PCJStorageException {
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(results);
    // Fetch the variable orders from the PCJ table.
    final PcjMetadata metadata = getPcjMetadata(accumuloConn, pcjTableName);
    // Write each result formatted using each of the variable orders.
    BatchWriter writer = null;
    try {
        writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
        for (final VisibilityBindingSet result : results) {
            final Set<Mutation> addResultMutations = makeWriteResultMutations(metadata.getVarOrders(), result);
            writer.addMutations(addResultMutations);
        }
    } catch (TableNotFoundException | MutationsRejectedException e) {
        throw new PCJStorageException("Could not add results to the PCJ table named: " + pcjTableName, e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (final MutationsRejectedException e) {
                throw new PCJStorageException("Could not add results to a PCJ table because some of the mutations were rejected.", e);
            }
        }
    }
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) ConditionalMutation(org.apache.accumulo.core.data.ConditionalMutation) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException)

Aggregations

PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)47 Test (org.junit.Test)30 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)22 HashSet (java.util.HashSet)17 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)16 URIImpl (org.openrdf.model.impl.URIImpl)15 MapBindingSet (org.openrdf.query.impl.MapBindingSet)15 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)14 BindingSet (org.openrdf.query.BindingSet)14 Connector (org.apache.accumulo.core.client.Connector)12 PCJStorageException (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException)11 NumericLiteralImpl (org.openrdf.model.impl.NumericLiteralImpl)10 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)9 ShiftVarOrderFactory (org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory)8 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)6 Statement (org.openrdf.model.Statement)6 LiteralImpl (org.openrdf.model.impl.LiteralImpl)6 StatementImpl (org.openrdf.model.impl.StatementImpl)6 MalformedQueryException (org.openrdf.query.MalformedQueryException)6 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)6