Search in sources :

Example 1 with PcjTables

use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.

the class AccumuloIndexSetProvider method getIndices.

@Override
protected List<ExternalTupleSet> getIndices() throws PcjIndexSetException {
    requireNonNull(conf);
    try {
        final String tablePrefix = requireNonNull(conf.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX));
        final Connector conn = requireNonNull(ConfigUtils.getConnector(conf));
        List<String> tables = null;
        if (conf instanceof RdfCloudTripleStoreConfiguration) {
            tables = ((RdfCloudTripleStoreConfiguration) conf).getPcjTables();
        }
        // this maps associates pcj table name with pcj sparql query
        final Map<String, String> indexTables = Maps.newLinkedHashMap();
        try (final PrecomputedJoinStorage storage = new AccumuloPcjStorage(conn, tablePrefix)) {
            final PcjTableNameFactory pcjFactory = new PcjTableNameFactory();
            final boolean tablesProvided = tables != null && !tables.isEmpty();
            if (tablesProvided) {
                // if tables provided, associate table name with sparql
                for (final String table : tables) {
                    indexTables.put(table, storage.getPcjMetadata(pcjFactory.getPcjId(table)).getSparql());
                }
            } else if (hasRyaDetails(tablePrefix, conn)) {
                // If this is a newer install of Rya, and it has PCJ Details, then
                // use those.
                final List<String> ids = storage.listPcjs();
                for (final String id : ids) {
                    indexTables.put(pcjFactory.makeTableName(tablePrefix, id), storage.getPcjMetadata(id).getSparql());
                }
            } else {
                // Otherwise figure it out by scanning tables.
                final PcjTables pcjTables = new PcjTables();
                for (final String table : conn.tableOperations().list()) {
                    if (table.startsWith(tablePrefix + "INDEX")) {
                        indexTables.put(table, pcjTables.getPcjMetadata(conn, table).getSparql());
                    }
                }
            }
        }
        // use table name sparql map (indexTables) to create {@link
        // AccumuloIndexSet}
        final List<ExternalTupleSet> index = Lists.newArrayList();
        if (indexTables.isEmpty()) {
            log.info("No Index found");
        } else {
            for (final String table : indexTables.keySet()) {
                final String indexSparqlString = indexTables.get(table);
                index.add(new AccumuloIndexSet(indexSparqlString, conf, table));
            }
        }
        return index;
    } catch (final PCJStorageException | AccumuloException | AccumuloSecurityException | MalformedQueryException | SailException | QueryEvaluationException | TableNotFoundException e) {
        throw new PcjIndexSetException("Failed to retrieve the indicies.", e);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) AccumuloIndexSet(org.apache.rya.indexing.external.tupleSet.AccumuloIndexSet) PcjTableNameFactory(org.apache.rya.indexing.pcj.storage.accumulo.PcjTableNameFactory) SailException(org.openrdf.sail.SailException) ExternalTupleSet(org.apache.rya.indexing.external.tupleSet.ExternalTupleSet) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MalformedQueryException(org.openrdf.query.MalformedQueryException) List(java.util.List) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) RdfCloudTripleStoreConfiguration(org.apache.rya.api.RdfCloudTripleStoreConfiguration)

Example 2 with PcjTables

use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.

the class RyaDirectExample method createPCJ.

// private static void testDeleteGeoData(final SailRepositoryConnection conn)
// throws Exception {
// // Delete all stored points
// final String sparqlDelete = "PREFIX geo: <http://www.opengis.net/ont/geosparql#>  "//
// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/>  "//
// + "DELETE {\n" //
// + "  ?feature a geo:Feature . "//
// + "  ?feature geo:hasGeometry ?point . "//
// + "  ?point a geo:Point . "//
// + "  ?point geo:asWKT ?wkt . "//
// + "}\n" + "WHERE { \n" + "  ?feature a geo:Feature . "//
// + "  ?feature geo:hasGeometry ?point . "//
// + "  ?point a geo:Point . "//
// + "  ?point geo:asWKT ?wkt . "//
// + "}";//
// 
// final Update deleteUpdate = conn.prepareUpdate(QueryLanguage.SPARQL,
// sparqlDelete);
// deleteUpdate.execute();
// 
// String queryString;
// TupleQuery tupleQuery;
// CountingResultHandler tupleHandler;
// 
// // Find all stored points
// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#>  "//
// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/>  "//
// + "SELECT ?feature ?point ?wkt " //
// + "{" //
// + "  ?feature a geo:Feature . "//
// + "  ?feature geo:hasGeometry ?point . "//
// + "  ?point a geo:Point . "//
// + "  ?point geo:asWKT ?wkt . "//
// + "}";//
// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString);
// tupleHandler = new CountingResultHandler();
// tupleQuery.evaluate(tupleHandler);
// log.info("Result count : " + tupleHandler.getCount());
// Validate.isTrue(tupleHandler.getCount() == 0);
// }
private static void createPCJ(final Configuration conf) throws RepositoryException, AccumuloException, AccumuloSecurityException, TableExistsException, PcjException, InferenceEngineException, NumberFormatException, UnknownHostException, SailException, TableNotFoundException {
    final Configuration config = new AccumuloRdfConfiguration(conf);
    config.set(ConfigUtils.USE_PCJ, "false");
    SailRepository repository = null;
    SailRepositoryConnection conn = null;
    try {
        final Sail extSail = RyaSailFactory.getInstance(config);
        repository = new SailRepository(extSail);
        conn = repository.getConnection();
        final String queryString1 = // 
        "" + // 
        "SELECT ?e ?c ?l ?o " + // 
        "{" + // 
        "  ?c a ?e . " + // 
        "  ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . " + // 
        "  ?e <uri:talksTo> ?o . " + // 
        "}";
        final String queryString2 = // 
        "" + // 
        "SELECT ?e ?c ?l ?o " + // 
        "{" + // 
        "  ?e a ?c . " + // 
        "  ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . " + // 
        "  ?e <uri:talksTo> ?o . " + // 
        "}";
        URI obj, subclass, talksTo;
        final URI person = new URIImpl("urn:people:alice");
        final URI feature = new URIImpl("urn:feature");
        final URI sub = new URIImpl("uri:entity");
        subclass = new URIImpl("uri:class");
        obj = new URIImpl("uri:obj");
        talksTo = new URIImpl("uri:talksTo");
        conn.add(person, RDF.TYPE, sub);
        conn.add(feature, RDF.TYPE, sub);
        conn.add(sub, RDF.TYPE, subclass);
        conn.add(sub, RDFS.LABEL, new LiteralImpl("label"));
        conn.add(sub, talksTo, obj);
        final String tablename1 = RYA_TABLE_PREFIX + "INDEX_1";
        final String tablename2 = RYA_TABLE_PREFIX + "INDEX_2";
        final Connector accCon = new MockInstance(INSTANCE).getConnector("root", new PasswordToken("".getBytes(StandardCharsets.UTF_8)));
        new PcjTables().createAndPopulatePcj(conn, accCon, tablename1, queryString1, new String[] { "e", "c", "l", "o" }, Optional.<PcjVarOrderFactory>absent());
        new PcjTables().createAndPopulatePcj(conn, accCon, tablename2, queryString2, new String[] { "e", "c", "l", "o" }, Optional.<PcjVarOrderFactory>absent());
    } catch (final RyaDAOException e) {
        throw new Error("While creating PCJ tables.", e);
    } finally {
        closeQuietly(conn);
        closeQuietly(repository);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) AccumuloIndexingConfiguration(org.apache.rya.indexing.accumulo.AccumuloIndexingConfiguration) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) SailRepository(org.openrdf.repository.sail.SailRepository) URIImpl(org.openrdf.model.impl.URIImpl) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) URI(org.openrdf.model.URI) LiteralImpl(org.openrdf.model.impl.LiteralImpl) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Sail(org.openrdf.sail.Sail) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables)

Example 3 with PcjTables

use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method populatePcj.

/**
 * Scan Rya for results that solve the PCJ's query and store them in the PCJ
 * table.
 * <p>
 * This method assumes the PCJ table has already been created.
 *
 * @param accumuloConn
 *            - A connection to the Accumulo that hosts the PCJ table. (not
 *            null)
 * @param pcjTableName
 *            - The name of the PCJ table that will receive the results.
 *            (not null)
 * @param ryaConn
 *            - A connection to the Rya store that will be queried to find
 *            results. (not null)
 * @throws PcjException
 *             If results could not be written to the PCJ table, the PCJ
 *             table does not exist, or the query that is being execute was
 *             malformed.
 */
public static void populatePcj(final Connector accumuloConn, final String pcjTableName, final RepositoryConnection ryaConn) throws PcjException {
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(ryaConn);
    try {
        // Fetch the query that needs to be executed from the PCJ table.
        final PcjMetadata pcjMetadata = new PcjTables().getPcjMetadata(accumuloConn, pcjTableName);
        final String sparql = pcjMetadata.getSparql();
        // Query Rya for results to the SPARQL query.
        final TupleQuery query = ryaConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
        final TupleQueryResult results = query.evaluate();
        // Load batches of 1000 of them at a time into the PCJ table
        final Set<BindingSet> batch = new HashSet<>(1000);
        while (results.hasNext()) {
            batch.add(results.next());
            if (batch.size() == 1000) {
                addResults(accumuloConn, pcjTableName, batch);
                batch.clear();
            }
        }
        if (!batch.isEmpty()) {
            addResults(accumuloConn, pcjTableName, batch);
        }
    } catch (RepositoryException | MalformedQueryException | QueryEvaluationException e) {
        throw new PcjException("Could not populate a PCJ table with Rya results for the table named: " + pcjTableName, e);
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) TupleQuery(org.openrdf.query.TupleQuery) RepositoryException(org.openrdf.repository.RepositoryException) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) MalformedQueryException(org.openrdf.query.MalformedQueryException) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables) TupleQueryResult(org.openrdf.query.TupleQueryResult) HashSet(java.util.HashSet)

Example 4 with PcjTables

use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method writeResults.

/**
 * Add a collection of results to a specific PCJ table.
 *
 * @param accumuloConn
 *            - A connection to the Accumulo that hosts the PCJ table. (not
 *            null)
 * @param pcjTableName
 *            - The name of the PCJ table that will receive the results.
 *            (not null)
 * @param results
 *            - Binding sets that will be written to the PCJ table. (not
 *            null)
 * @throws PcjException
 *             The provided PCJ table doesn't exist, is missing the PCJ
 *             metadata, or the result could not be written to it.
 */
private static void writeResults(final Connector accumuloConn, final String pcjTableName, final Collection<BindingSet> results) throws PcjException {
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(results);
    // Fetch the variable orders from the PCJ table.
    final PcjMetadata metadata = new PcjTables().getPcjMetadata(accumuloConn, pcjTableName);
    // Write each result formatted using each of the variable orders.
    BatchWriter writer = null;
    try {
        writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
        for (final BindingSet result : results) {
            final Set<Mutation> addResultMutations = makeWriteResultMutations(metadata.getVarOrders(), result);
            writer.addMutations(addResultMutations);
        }
    } catch (TableNotFoundException | MutationsRejectedException e) {
        throw new PcjException("Could not add results to the PCJ table named: " + pcjTableName, e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (final MutationsRejectedException e) {
                throw new PcjException("Could not add results to a PCJ table because some of the mutations were rejected.", e);
            }
        }
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) BindingSet(org.openrdf.query.BindingSet) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException)

Example 5 with PcjTables

use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method createAndPopulatePcj.

// ****************************Creation and Population of PcjTables Accumulo***************************
/**
 * Creates a new PCJ Table in Accumulo and populates it by scanning an
 * instance of Rya for historic matches.
 * <p>
 * If any portion of this operation fails along the way, the partially
 * create PCJ table will be left in Accumulo.
 *
 * @param ryaConn - Connects to the Rya that will be scanned. (not null)
 * @param accumuloConn - Connects to the accumulo that hosts the PCJ results. (not null)
 * @param pcjTableName - The name of the PCJ table that will be created. (not null)
 * @param sparql - The SPARQL query whose results will be loaded into the table. (not null)
 * @param resultVariables - The variables that are included in the query's resulting binding sets. (not null)
 * @param pcjVarOrderFactory - An optional factory that indicates the various variable orders
 *   the results will be stored in. If one is not provided, then {@link ShiftVarOrderFactory}
 *   is used by default. (not null)
 * @throws PcjException The PCJ table could not be create or the values from
 *   Rya were not able to be loaded into it.
 */
public static void createAndPopulatePcj(final RepositoryConnection ryaConn, final Connector accumuloConn, final String pcjTableName, final String sparql, final String[] resultVariables, final Optional<PcjVarOrderFactory> pcjVarOrderFactory) throws PcjException {
    checkNotNull(ryaConn);
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(sparql);
    checkNotNull(resultVariables);
    checkNotNull(pcjVarOrderFactory);
    final PcjTables pcj = new PcjTables();
    // Create the PCJ's variable orders.
    final PcjVarOrderFactory varOrderFactory = pcjVarOrderFactory.or(new ShiftVarOrderFactory());
    final Set<VariableOrder> varOrders = varOrderFactory.makeVarOrders(new VariableOrder(resultVariables));
    // Create the PCJ table in Accumulo.
    pcj.createPcjTable(accumuloConn, pcjTableName, varOrders, sparql);
    // Load historic matches from Rya into the PCJ table.
    populatePcj(accumuloConn, pcjTableName, ryaConn);
}
Also used : PcjVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.PcjVarOrderFactory) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) ShiftVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables)

Aggregations

PcjTables (org.apache.rya.indexing.pcj.storage.accumulo.PcjTables)5 Connector (org.apache.accumulo.core.client.Connector)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)2 PcjException (org.apache.rya.indexing.pcj.storage.PcjException)2 PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)2 BindingSet (org.openrdf.query.BindingSet)2 MalformedQueryException (org.openrdf.query.MalformedQueryException)2 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)2 HashSet (java.util.HashSet)1 List (java.util.List)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 BatchWriter (org.apache.accumulo.core.client.BatchWriter)1 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)1 MutationsRejectedException (org.apache.accumulo.core.client.MutationsRejectedException)1 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)1 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)1 Mutation (org.apache.accumulo.core.data.Mutation)1 Configuration (org.apache.hadoop.conf.Configuration)1