Search in sources :

Example 1 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class MongoPcjStorageIT method purge.

@Test
public void purge() throws Exception {
    try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(getMongoClient(), conf.getRyaInstanceName())) {
        final MongoRyaInstanceDetailsRepository detailsRepo = new MongoRyaInstanceDetailsRepository(getMongoClient(), conf.getRyaInstanceName());
        detailsRepo.initialize(RyaDetails.builder().setRyaInstanceName(conf.getRyaInstanceName()).setRyaVersion("test").setEntityCentricIndexDetails(new EntityCentricIndexDetails(false)).setTemporalIndexDetails(new TemporalIndexDetails(false)).setFreeTextDetails(new FreeTextIndexDetails(false)).setProspectorDetails(new ProspectorDetails(Optional.absent())).setJoinSelectivityDetails(new JoinSelectivityDetails(Optional.absent())).setPCJIndexDetails(PCJIndexDetails.builder().setEnabled(true)).build());
        // Create a PCJ.
        final String sparql = "SELECT * WHERE { ?a <http://isA> ?b }";
        final String pcjId = pcjStorage.createPcj(sparql);
        // Add some binding sets to it.
        final Set<VisibilityBindingSet> expectedResults = new HashSet<>();
        final MapBindingSet aliceBS = new MapBindingSet();
        aliceBS.addBinding("a", new URIImpl("http://Alice"));
        aliceBS.addBinding("b", new URIImpl("http://Person"));
        expectedResults.add(new VisibilityBindingSet(aliceBS, ""));
        final MapBindingSet charlieBS = new MapBindingSet();
        charlieBS.addBinding("a", new URIImpl("http://Charlie"));
        charlieBS.addBinding("b", new URIImpl("http://Comedian"));
        expectedResults.add(new VisibilityBindingSet(charlieBS, ""));
        pcjStorage.addResults(pcjId, expectedResults);
        // Purge the PCJ.
        pcjStorage.purge(pcjId);
        // List the results that were stored.
        final Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
            while (resultsIt.hasNext()) {
                results.add(resultsIt.next());
            }
        }
        assertTrue(results.isEmpty());
        // Make sure the PCJ metadata was updated.
        final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
        final Set<VariableOrder> varOrders = new ShiftVarOrderFactory().makeVarOrders(sparql);
        final PcjMetadata expectedMetadata = new PcjMetadata(sparql, 0L, varOrders);
        assertEquals(expectedMetadata, metadata);
    }
}
Also used : ProspectorDetails(org.apache.rya.api.instance.RyaDetails.ProspectorDetails) MapBindingSet(org.openrdf.query.impl.MapBindingSet) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) MongoRyaInstanceDetailsRepository(org.apache.rya.mongodb.instance.MongoRyaInstanceDetailsRepository) ShiftVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory) URIImpl(org.openrdf.model.impl.URIImpl) JoinSelectivityDetails(org.apache.rya.api.instance.RyaDetails.JoinSelectivityDetails) EntityCentricIndexDetails(org.apache.rya.api.instance.RyaDetails.EntityCentricIndexDetails) TemporalIndexDetails(org.apache.rya.api.instance.RyaDetails.TemporalIndexDetails) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) FreeTextIndexDetails(org.apache.rya.api.instance.RyaDetails.FreeTextIndexDetails) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class MongoPcjStorageIT method getPcjMetadata.

@Test
public void getPcjMetadata() throws Exception {
    try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(getMongoClient(), conf.getRyaInstanceName())) {
        final MongoRyaInstanceDetailsRepository detailsRepo = new MongoRyaInstanceDetailsRepository(getMongoClient(), conf.getRyaInstanceName());
        detailsRepo.initialize(RyaDetails.builder().setRyaInstanceName(conf.getRyaInstanceName()).setRyaVersion("test").setEntityCentricIndexDetails(new EntityCentricIndexDetails(false)).setTemporalIndexDetails(new TemporalIndexDetails(false)).setFreeTextDetails(new FreeTextIndexDetails(false)).setProspectorDetails(new ProspectorDetails(Optional.absent())).setJoinSelectivityDetails(new JoinSelectivityDetails(Optional.absent())).setPCJIndexDetails(PCJIndexDetails.builder().setEnabled(true)).build());
        // Create a PCJ.
        final String sparql = "SELECT * WHERE { ?a <http://isA> ?b }";
        final String pcjId = pcjStorage.createPcj(sparql);
        // Fetch the PCJ's metadata.
        final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
        // Ensure it has the expected values.
        final Set<VariableOrder> varOrders = new ShiftVarOrderFactory().makeVarOrders(sparql);
        final PcjMetadata expectedMetadata = new PcjMetadata(sparql, 0L, varOrders);
        assertEquals(expectedMetadata, metadata);
    }
}
Also used : ProspectorDetails(org.apache.rya.api.instance.RyaDetails.ProspectorDetails) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) MongoRyaInstanceDetailsRepository(org.apache.rya.mongodb.instance.MongoRyaInstanceDetailsRepository) ShiftVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory) JoinSelectivityDetails(org.apache.rya.api.instance.RyaDetails.JoinSelectivityDetails) EntityCentricIndexDetails(org.apache.rya.api.instance.RyaDetails.EntityCentricIndexDetails) TemporalIndexDetails(org.apache.rya.api.instance.RyaDetails.TemporalIndexDetails) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) FreeTextIndexDetails(org.apache.rya.api.instance.RyaDetails.FreeTextIndexDetails) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) Test(org.junit.Test)

Example 3 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjDocumentsWithMockTest method populatePcj.

@Test
public void populatePcj() throws Exception {
    final RdfCloudTripleStore ryaStore = new RdfCloudTripleStore();
    final MongoDBRyaDAO dao = new MongoDBRyaDAO();
    dao.setConf(new StatefulMongoDBRdfConfiguration(conf, getMongoClient()));
    dao.init();
    ryaStore.setRyaDAO(dao);
    ryaStore.initialize();
    final SailRepositoryConnection ryaConn = new RyaSailRepository(ryaStore).getConnection();
    try {
        // Load some Triples into Rya.
        final Set<Statement> triples = new HashSet<>();
        triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://hasAge"), new NumericLiteralImpl(14, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Alice"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://hasAge"), new NumericLiteralImpl(16, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Bob"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://hasAge"), new NumericLiteralImpl(12, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Charlie"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://hasAge"), new NumericLiteralImpl(43, XMLSchema.INTEGER)));
        triples.add(new StatementImpl(new URIImpl("http://Eve"), new URIImpl("http://playsSport"), new LiteralImpl("Soccer")));
        for (final Statement triple : triples) {
            ryaConn.add(triple);
        }
        // Create a PCJ table that will include those triples in its results.
        final String sparql = "SELECT ?name ?age " + "{" + "?name <http://hasAge> ?age." + "?name <http://playsSport> \"Soccer\" " + "}";
        final String pcjTableName = new PcjTableNameFactory().makeTableName(conf.getRyaInstanceName(), "testPcj");
        final MongoPcjDocuments pcjs = new MongoPcjDocuments(getMongoClient(), conf.getRyaInstanceName());
        pcjs.createAndPopulatePcj(ryaConn, pcjTableName, sparql);
        // Make sure the cardinality was updated.
        final PcjMetadata metadata = pcjs.getPcjMetadata(pcjTableName);
        assertEquals(4, metadata.getCardinality());
    } finally {
        ryaConn.close();
        ryaStore.shutDown();
    }
}
Also used : RdfCloudTripleStore(org.apache.rya.rdftriplestore.RdfCloudTripleStore) StatefulMongoDBRdfConfiguration(org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration) Statement(org.openrdf.model.Statement) RyaSailRepository(org.apache.rya.rdftriplestore.RyaSailRepository) URIImpl(org.openrdf.model.impl.URIImpl) PcjTableNameFactory(org.apache.rya.indexing.pcj.storage.accumulo.PcjTableNameFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) MongoDBRyaDAO(org.apache.rya.mongodb.MongoDBRyaDAO) NumericLiteralImpl(org.openrdf.model.impl.NumericLiteralImpl) LiteralImpl(org.openrdf.model.impl.LiteralImpl) NumericLiteralImpl(org.openrdf.model.impl.NumericLiteralImpl) StatementImpl(org.openrdf.model.impl.StatementImpl) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method populatePcj.

/**
 * Scan Rya for results that solve the PCJ's query and store them in the PCJ
 * table.
 * <p>
 * This method assumes the PCJ table has already been created.
 *
 * @param mongoClient - A connection to the mongoDB that hosts the PCJ table. (not null)
 * @param pcjTableName - The name of the PCJ table that will receive the results. (not null)
 * @param ryaConn - A connection to the Rya store that will be queried to find results. (not null)
 * @throws PcjException
 *             If results could not be written to the PCJ table, the PCJ
 *             table does not exist, or the query that is being execute was
 *             malformed.
 */
public static void populatePcj(final MongoPcjDocuments pcj, final String pcjTableName, final RepositoryConnection ryaConn) throws PcjException {
    checkNotNull(pcj);
    checkNotNull(pcjTableName);
    checkNotNull(ryaConn);
    try {
        // Fetch the query that needs to be executed from the PCJ table.
        final PcjMetadata pcjMetadata = pcj.getPcjMetadata(pcjTableName);
        final String sparql = pcjMetadata.getSparql();
        // Query Rya for results to the SPARQL query.
        final TupleQuery query = ryaConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
        final TupleQueryResult results = query.evaluate();
        // Load batches of 1000 of them at a time into the PCJ table
        final Set<BindingSet> batch = new HashSet<>(1000);
        while (results.hasNext()) {
            batch.add(results.next());
            if (batch.size() == 1000) {
                writeResults(pcj, pcjTableName, batch);
                batch.clear();
            }
        }
        if (!batch.isEmpty()) {
            writeResults(pcj, pcjTableName, batch);
        }
    } catch (RepositoryException | MalformedQueryException | QueryEvaluationException e) {
        throw new PcjException("Could not populate a PCJ table with Rya results for the table named: " + pcjTableName, e);
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) MalformedQueryException(org.openrdf.query.MalformedQueryException) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) TupleQuery(org.openrdf.query.TupleQuery) RepositoryException(org.openrdf.repository.RepositoryException) TupleQueryResult(org.openrdf.query.TupleQueryResult) HashSet(java.util.HashSet)

Example 5 with PcjMetadata

use of org.apache.rya.indexing.pcj.storage.PcjMetadata in project incubator-rya by apache.

the class AccumuloBatchUpdatePCJ method updatePCJResults.

private void updatePCJResults(final String ryaInstanceName, final String pcjId) throws InstanceDoesNotExistException, PCJDoesNotExistException, RyaClientException {
    // Things that have to be closed before we exit.
    Sail sail = null;
    SailConnection sailConn = null;
    CloseableIteration<? extends BindingSet, QueryEvaluationException> results = null;
    try (final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(super.getConnector(), ryaInstanceName)) {
        // Create an instance of Sail backed by the Rya instance.
        sail = connectToRya(ryaInstanceName);
        // Purge the old results from the PCJ.
        try {
            pcjStorage.purge(pcjId);
        } catch (final PCJStorageException e) {
            throw new RyaClientException("Could not batch update PCJ with ID '" + pcjId + "' because the old " + "results could not be purged from it.", e);
        }
        // Parse the PCJ's SPARQL query.
        final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
        final String sparql = metadata.getSparql();
        final SPARQLParser parser = new SPARQLParser();
        final ParsedQuery parsedQuery = parser.parseQuery(sparql, null);
        // Execute the query.
        sailConn = sail.getConnection();
        results = sailConn.evaluate(parsedQuery.getTupleExpr(), null, null, false);
        // Load the results into the PCJ table.
        final List<VisibilityBindingSet> batch = new ArrayList<>(1000);
        while (results.hasNext()) {
            final VisibilityBindingSet result = new VisibilityBindingSet(results.next(), "");
            batch.add(result);
            if (batch.size() == 1000) {
                pcjStorage.addResults(pcjId, batch);
                batch.clear();
            }
        }
        if (!batch.isEmpty()) {
            pcjStorage.addResults(pcjId, batch);
            batch.clear();
        }
    } catch (final MalformedQueryException | PCJStorageException | SailException | QueryEvaluationException e) {
        throw new RyaClientException("Fail to batch load new results into the PCJ with ID '" + pcjId + "'.", e);
    } finally {
        if (results != null) {
            try {
                results.close();
            } catch (final QueryEvaluationException e) {
                log.warn(e.getMessage(), e);
            }
        }
        if (sailConn != null) {
            try {
                sailConn.close();
            } catch (final SailException e) {
                log.warn(e.getMessage(), e);
            }
        }
        if (sail != null) {
            try {
                sail.shutDown();
            } catch (final SailException e) {
                log.warn(e.getMessage(), e);
            }
        }
    }
}
Also used : SPARQLParser(org.openrdf.query.parser.sparql.SPARQLParser) RyaClientException(org.apache.rya.api.client.RyaClientException) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) ParsedQuery(org.openrdf.query.parser.ParsedQuery) ArrayList(java.util.ArrayList) SailException(org.openrdf.sail.SailException) SailConnection(org.openrdf.sail.SailConnection) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) Sail(org.openrdf.sail.Sail) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MalformedQueryException(org.openrdf.query.MalformedQueryException) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException)

Aggregations

PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)47 Test (org.junit.Test)30 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)22 HashSet (java.util.HashSet)17 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)16 URIImpl (org.openrdf.model.impl.URIImpl)15 MapBindingSet (org.openrdf.query.impl.MapBindingSet)15 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)14 BindingSet (org.openrdf.query.BindingSet)14 Connector (org.apache.accumulo.core.client.Connector)12 PCJStorageException (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException)11 NumericLiteralImpl (org.openrdf.model.impl.NumericLiteralImpl)10 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)9 ShiftVarOrderFactory (org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory)8 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)6 Statement (org.openrdf.model.Statement)6 LiteralImpl (org.openrdf.model.impl.LiteralImpl)6 StatementImpl (org.openrdf.model.impl.StatementImpl)6 MalformedQueryException (org.openrdf.query.MalformedQueryException)6 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)6