Search in sources :

Example 36 with AccumuloPcjStorage

use of org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage in project incubator-rya by apache.

the class InputIT method historicResults.

/**
 * Ensure historic matches are included in the result.
 */
@Test
public void historicResults() throws Exception {
    // A query that finds people who talk to Eve and work at Chipotle.
    final String sparql = "SELECT ?x WHERE { " + "?x <http://talksTo> <http://Eve>. " + "?x <http://worksAt> <http://Chipotle>." + "}";
    // Triples that are loaded into Rya before the PCJ is created.
    final ValueFactory vf = new ValueFactoryImpl();
    final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Bob"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Charlie"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Eve"), vf.createURI("http://helps"), vf.createURI("http://Kevin")), vf.createStatement(vf.createURI("http://Bob"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Charlie"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Eve"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://David"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // The expected results of the SPARQL query once the PCJ has been computed.
    final Set<BindingSet> expected = new HashSet<>();
    MapBindingSet bs = new MapBindingSet();
    bs.addBinding("x", vf.createURI("http://Bob"));
    expected.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("x", vf.createURI("http://Charlie"));
    expected.add(bs);
    // Load the historic data into Rya.
    final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
    for (final Statement triple : historicTriples) {
        ryaConn.add(triple);
    }
    ryaConn.close();
    // Create the PCJ table.
    final Connector accumuloConn = super.getAccumuloConnector();
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
    final String pcjId = pcjStorage.createPcj(sparql);
    try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
        // Tell the Fluo app to maintain the PCJ.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
        // Verify the end results of the query match the expected results.
        super.getMiniFluo().waitForObservers();
        final Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
            while (resultsIt.hasNext()) {
                results.add(resultsIt.next());
            }
        }
        assertEquals(expected, results);
    }
}
Also used : MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) Connector(org.apache.accumulo.core.client.Connector) FluoClient(org.apache.fluo.api.client.FluoClient) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) Statement(org.openrdf.model.Statement) RyaStatement(org.apache.rya.api.domain.RyaStatement) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) ValueFactory(org.openrdf.model.ValueFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 37 with AccumuloPcjStorage

use of org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage in project incubator-rya by apache.

the class InputIT method historicAndStreamConflict.

/**
 * Simulates the case where a Triple is added to Rya, a new query that
 * includes the triple as a historic match is inserted into Fluo, and then
 * the same triple is streamed into Fluo. The query's results will already
 * include the Triple because they were added while the query was being
 * created. This case should not fail or effect the end results in any way.
 */
@Test
public void historicAndStreamConflict() throws Exception {
    // A query that finds people who talk to Eve and work at Chipotle.
    final String sparql = "SELECT ?x WHERE { " + "?x <http://talksTo> <http://Eve>. " + "?x <http://worksAt> <http://Chipotle>." + "}";
    // Triples that are loaded into Rya before the PCJ is created.
    final ValueFactory vf = new ValueFactoryImpl();
    final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Triples that will be streamed into Fluo after the PCJ has been created.
    final Set<RyaStatement> streamedTriples = Sets.newHashSet(new RyaStatement(new RyaURI("http://Alice"), new RyaURI("http://talksTo"), new RyaURI("http://Eve")), new RyaStatement(new RyaURI("http://Alice"), new RyaURI("http://worksAt"), new RyaURI("http://Chipotle")));
    // The expected final result.
    final Set<BindingSet> expected = new HashSet<>();
    final MapBindingSet bs = new MapBindingSet();
    bs.addBinding("x", vf.createURI("http://Alice"));
    expected.add(bs);
    // Load the historic data into Rya.
    final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
    for (final Statement triple : historicTriples) {
        ryaConn.add(triple);
    }
    ryaConn.close();
    // Create the PCJ table.
    final Connector accumuloConn = super.getAccumuloConnector();
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
    final String pcjId = pcjStorage.createPcj(sparql);
    try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
        // Tell the Fluo app to maintain the PCJ.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
        // Ensure Alice is a match.
        super.getMiniFluo().waitForObservers();
        Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
            while (resultsIt.hasNext()) {
                results.add(resultsIt.next());
            }
        }
        assertEquals(expected, results);
        // Stream the same Alice triple into Fluo.
        new InsertTriples().insert(fluoClient, streamedTriples, Optional.<String>absent());
        // Verify the end results of the query is stiill only Alice.
        super.getMiniFluo().waitForObservers();
        results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
            while (resultsIt.hasNext()) {
                results.add(resultsIt.next());
            }
        }
        assertEquals(expected, results);
    }
}
Also used : MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) Connector(org.apache.accumulo.core.client.Connector) FluoClient(org.apache.fluo.api.client.FluoClient) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) Statement(org.openrdf.model.Statement) RyaStatement(org.apache.rya.api.domain.RyaStatement) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) ValueFactory(org.openrdf.model.ValueFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) RyaURI(org.apache.rya.api.domain.RyaURI) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 38 with AccumuloPcjStorage

use of org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage in project incubator-rya by apache.

the class AccumuloIndexSetColumnVisibilityTest method init.

@BeforeClass
public static void init() throws AccumuloException, AccumuloSecurityException, PCJStorageException, IOException, InterruptedException, TableNotFoundException, AlreadyInitializedException, RyaDetailsRepositoryException {
    // Setup the mini accumulo instance used by the test.
    accumulo = startMiniAccumulo();
    accumulo.getZooKeepers();
    instance = accumulo.getInstanceName();
    zooKeepers = accumulo.getZooKeepers();
    conf = getConf();
    accCon.securityOperations().changeUserAuthorizations("root", new Authorizations("U", "USA"));
    // Initialize the Rya Details for the Rya instance.
    initRyaDetails();
    // Initialize a PCJ.
    storage = new AccumuloPcjStorage(accCon, ryaInstanceName);
    pcjId = storage.createPcj("SELECT ?name ?age " + "{" + "?name <http://hasAge> ?age ." + "?name <http://playsSport> \"Soccer\" " + "}");
    // Store the PCJ's results.
    pcjBs1 = new QueryBindingSet();
    pcjBs1.addBinding("age", new NumericLiteralImpl(14, XMLSchema.INTEGER));
    pcjBs1.addBinding("name", new URIImpl("http://Alice"));
    pcjBs2 = new QueryBindingSet();
    pcjBs2.addBinding("age", new NumericLiteralImpl(16, XMLSchema.INTEGER));
    pcjBs2.addBinding("name", new URIImpl("http://Bob"));
    final Set<VisibilityBindingSet> visBs = new HashSet<>();
    for (final BindingSet bs : Sets.<BindingSet>newHashSet(pcjBs1, pcjBs2)) {
        visBs.add(new VisibilityBindingSet(bs, "U|USA"));
    }
    storage.addResults(pcjId, visBs);
}
Also used : QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) Authorizations(org.apache.accumulo.core.security.Authorizations) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) NumericLiteralImpl(org.openrdf.model.impl.NumericLiteralImpl) URIImpl(org.openrdf.model.impl.URIImpl) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) HashSet(java.util.HashSet) BeforeClass(org.junit.BeforeClass)

Example 39 with AccumuloPcjStorage

use of org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage in project incubator-rya by apache.

the class RyaTableNames method getTableNames.

/**
 * Get the the Accumulo table names that are used by an instance of Rya.
 *
 * @param ryaInstanceName - The name of the Rya instance. (not null)
 * @param conn - A connector to the host Accumulo instance. (not null)
 * @return The Accumulo table names that are used by the Rya instance.
 * @throws NotInitializedException The instance's Rya Details have not been initialized.
 * @throws RyaDetailsRepositoryException General problem with the Rya Details repository.
 * @throws PCJStorageException General problem with the PCJ storage.
 */
public List<String> getTableNames(final String ryaInstanceName, final Connector conn) throws NotInitializedException, RyaDetailsRepositoryException, PCJStorageException {
    // Build the list of tables that may be present within the Rya instance.
    final List<String> tables = new ArrayList<>();
    // Core Rya tables.
    final TableLayoutStrategy coreTableNames = new TablePrefixLayoutStrategy(ryaInstanceName);
    tables.add(coreTableNames.getSpo());
    tables.add(coreTableNames.getPo());
    tables.add(coreTableNames.getOsp());
    tables.add(coreTableNames.getEval());
    tables.add(coreTableNames.getNs());
    tables.add(coreTableNames.getProspects());
    tables.add(coreTableNames.getSelectivity());
    // Rya Details table.
    tables.add(AccumuloRyaInstanceDetailsRepository.makeTableName(ryaInstanceName));
    // Secondary Indexer Tables.
    final RyaDetailsRepository detailsRepo = new AccumuloRyaInstanceDetailsRepository(conn, ryaInstanceName);
    final RyaDetails details = detailsRepo.getRyaInstanceDetails();
    if (details.getEntityCentricIndexDetails().isEnabled()) {
        tables.add(EntityCentricIndex.makeTableName(ryaInstanceName));
    }
    if (details.getFreeTextIndexDetails().isEnabled()) {
        tables.addAll(AccumuloFreeTextIndexer.makeTableNames(ryaInstanceName));
    }
    if (details.getTemporalIndexDetails().isEnabled()) {
        tables.add(AccumuloTemporalIndexer.makeTableName(ryaInstanceName));
    }
    if (details.getPCJIndexDetails().isEnabled()) {
        try (final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(conn, ryaInstanceName)) {
            final List<String> pcjIds = pcjStorage.listPcjs();
            final PcjTableNameFactory tableNameFactory = new PcjTableNameFactory();
            for (final String pcjId : pcjIds) {
                tables.add(tableNameFactory.makeTableName(ryaInstanceName, pcjId));
            }
        }
    }
    // Verify they actually exist. If any don't, remove them from the list.
    final TableOperations tableOps = conn.tableOperations();
    final Iterator<String> tablesIt = tables.iterator();
    while (tablesIt.hasNext()) {
        final String table = tablesIt.next();
        if (!tableOps.exists(table)) {
            tablesIt.remove();
        }
    }
    return tables;
}
Also used : TableLayoutStrategy(org.apache.rya.api.layout.TableLayoutStrategy) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) ArrayList(java.util.ArrayList) RyaDetails(org.apache.rya.api.instance.RyaDetails) PcjTableNameFactory(org.apache.rya.indexing.pcj.storage.accumulo.PcjTableNameFactory) AccumuloRyaInstanceDetailsRepository(org.apache.rya.accumulo.instance.AccumuloRyaInstanceDetailsRepository) TableOperations(org.apache.accumulo.core.client.admin.TableOperations) TablePrefixLayoutStrategy(org.apache.rya.api.layout.TablePrefixLayoutStrategy) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaDetailsRepository(org.apache.rya.api.instance.RyaDetailsRepository)

Example 40 with AccumuloPcjStorage

use of org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage in project incubator-rya by apache.

the class AccumuloPcjStorageSupplier method get.

@Override
public AccumuloPcjStorage get() {
    // Ensure a configuration has been set.
    final Configuration config = configSupplier.get();
    checkNotNull(config, "Could not create a AccumuloPcjStorage because the application's configuration has not been provided yet.");
    // Ensure the correct storage type has been set.
    final PrecomputedJoinIndexerConfig indexerConfig = new PrecomputedJoinIndexerConfig(config);
    final Optional<PrecomputedJoinStorageType> storageType = indexerConfig.getPcjStorageType();
    checkArgument(storageType.isPresent() && (storageType.get() == PrecomputedJoinStorageType.ACCUMULO), "This supplier requires the '" + PrecomputedJoinIndexerConfig.PCJ_STORAGE_TYPE + "' value be set to '" + PrecomputedJoinStorageType.ACCUMULO + "'.");
    // Ensure the Accumulo connector has been set.
    final Connector accumuloConn = accumuloSupplier.get();
    checkNotNull(accumuloConn, "The Accumulo Connector must be set before initializing the AccumuloPcjStorage.");
    final String ryaInstanceName = new AccumuloPcjStorageConfig(config).getRyaInstanceName();
    return new AccumuloPcjStorage(accumuloConn, ryaInstanceName);
}
Also used : PrecomputedJoinStorageType(org.apache.rya.indexing.external.PrecomputedJoinIndexerConfig.PrecomputedJoinStorageType) Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) PrecomputedJoinIndexerConfig(org.apache.rya.indexing.external.PrecomputedJoinIndexerConfig)

Aggregations

AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)46 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)44 Test (org.junit.Test)32 Connector (org.apache.accumulo.core.client.Connector)26 FluoClient (org.apache.fluo.api.client.FluoClient)21 CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)20 BindingSet (org.openrdf.query.BindingSet)20 MapBindingSet (org.openrdf.query.impl.MapBindingSet)18 HashSet (java.util.HashSet)15 RyaStatement (org.apache.rya.api.domain.RyaStatement)15 ValueFactory (org.openrdf.model.ValueFactory)13 RyaURI (org.apache.rya.api.domain.RyaURI)12 InsertTriples (org.apache.rya.indexing.pcj.fluo.api.InsertTriples)11 PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)10 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)9 FluoClientImpl (org.apache.fluo.core.client.FluoClientImpl)7 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)7 SailRepositoryConnection (org.openrdf.repository.sail.SailRepositoryConnection)7 PCJDetails (org.apache.rya.api.instance.RyaDetails.PCJIndexDetails.PCJDetails)6 MalformedQueryException (org.openrdf.query.MalformedQueryException)6