use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.
the class AccumuloIndexSetProvider method getIndices.
@Override
protected List<ExternalTupleSet> getIndices() throws PcjIndexSetException {
requireNonNull(conf);
try {
final String tablePrefix = requireNonNull(conf.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX));
final Connector conn = requireNonNull(ConfigUtils.getConnector(conf));
List<String> tables = null;
if (conf instanceof RdfCloudTripleStoreConfiguration) {
tables = ((RdfCloudTripleStoreConfiguration) conf).getPcjTables();
}
// this maps associates pcj table name with pcj sparql query
final Map<String, String> indexTables = Maps.newLinkedHashMap();
try (final PrecomputedJoinStorage storage = new AccumuloPcjStorage(conn, tablePrefix)) {
final PcjTableNameFactory pcjFactory = new PcjTableNameFactory();
final boolean tablesProvided = tables != null && !tables.isEmpty();
if (tablesProvided) {
// if tables provided, associate table name with sparql
for (final String table : tables) {
indexTables.put(table, storage.getPcjMetadata(pcjFactory.getPcjId(table)).getSparql());
}
} else if (hasRyaDetails(tablePrefix, conn)) {
// If this is a newer install of Rya, and it has PCJ Details, then
// use those.
final List<String> ids = storage.listPcjs();
for (final String id : ids) {
indexTables.put(pcjFactory.makeTableName(tablePrefix, id), storage.getPcjMetadata(id).getSparql());
}
} else {
// Otherwise figure it out by scanning tables.
final PcjTables pcjTables = new PcjTables();
for (final String table : conn.tableOperations().list()) {
if (table.startsWith(tablePrefix + "INDEX")) {
indexTables.put(table, pcjTables.getPcjMetadata(conn, table).getSparql());
}
}
}
}
// use table name sparql map (indexTables) to create {@link
// AccumuloIndexSet}
final List<ExternalTupleSet> index = Lists.newArrayList();
if (indexTables.isEmpty()) {
log.info("No Index found");
} else {
for (final String table : indexTables.keySet()) {
final String indexSparqlString = indexTables.get(table);
index.add(new AccumuloIndexSet(indexSparqlString, conf, table));
}
}
return index;
} catch (final PCJStorageException | AccumuloException | AccumuloSecurityException | MalformedQueryException | SailException | QueryEvaluationException | TableNotFoundException e) {
throw new PcjIndexSetException("Failed to retrieve the indicies.", e);
}
}
use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.
the class RyaDirectExample method createPCJ.
// private static void testDeleteGeoData(final SailRepositoryConnection conn)
// throws Exception {
// // Delete all stored points
// final String sparqlDelete = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "//
// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "//
// + "DELETE {\n" //
// + " ?feature a geo:Feature . "//
// + " ?feature geo:hasGeometry ?point . "//
// + " ?point a geo:Point . "//
// + " ?point geo:asWKT ?wkt . "//
// + "}\n" + "WHERE { \n" + " ?feature a geo:Feature . "//
// + " ?feature geo:hasGeometry ?point . "//
// + " ?point a geo:Point . "//
// + " ?point geo:asWKT ?wkt . "//
// + "}";//
//
// final Update deleteUpdate = conn.prepareUpdate(QueryLanguage.SPARQL,
// sparqlDelete);
// deleteUpdate.execute();
//
// String queryString;
// TupleQuery tupleQuery;
// CountingResultHandler tupleHandler;
//
// // Find all stored points
// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "//
// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "//
// + "SELECT ?feature ?point ?wkt " //
// + "{" //
// + " ?feature a geo:Feature . "//
// + " ?feature geo:hasGeometry ?point . "//
// + " ?point a geo:Point . "//
// + " ?point geo:asWKT ?wkt . "//
// + "}";//
// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString);
// tupleHandler = new CountingResultHandler();
// tupleQuery.evaluate(tupleHandler);
// log.info("Result count : " + tupleHandler.getCount());
// Validate.isTrue(tupleHandler.getCount() == 0);
// }
private static void createPCJ(final Configuration conf) throws RepositoryException, AccumuloException, AccumuloSecurityException, TableExistsException, PcjException, InferenceEngineException, NumberFormatException, UnknownHostException, SailException, TableNotFoundException {
final Configuration config = new AccumuloRdfConfiguration(conf);
config.set(ConfigUtils.USE_PCJ, "false");
SailRepository repository = null;
SailRepositoryConnection conn = null;
try {
final Sail extSail = RyaSailFactory.getInstance(config);
repository = new SailRepository(extSail);
conn = repository.getConnection();
final String queryString1 = //
"" + //
"SELECT ?e ?c ?l ?o " + //
"{" + //
" ?c a ?e . " + //
" ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . " + //
" ?e <uri:talksTo> ?o . " + //
"}";
final String queryString2 = //
"" + //
"SELECT ?e ?c ?l ?o " + //
"{" + //
" ?e a ?c . " + //
" ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . " + //
" ?e <uri:talksTo> ?o . " + //
"}";
URI obj, subclass, talksTo;
final URI person = new URIImpl("urn:people:alice");
final URI feature = new URIImpl("urn:feature");
final URI sub = new URIImpl("uri:entity");
subclass = new URIImpl("uri:class");
obj = new URIImpl("uri:obj");
talksTo = new URIImpl("uri:talksTo");
conn.add(person, RDF.TYPE, sub);
conn.add(feature, RDF.TYPE, sub);
conn.add(sub, RDF.TYPE, subclass);
conn.add(sub, RDFS.LABEL, new LiteralImpl("label"));
conn.add(sub, talksTo, obj);
final String tablename1 = RYA_TABLE_PREFIX + "INDEX_1";
final String tablename2 = RYA_TABLE_PREFIX + "INDEX_2";
final Connector accCon = new MockInstance(INSTANCE).getConnector("root", new PasswordToken("".getBytes(StandardCharsets.UTF_8)));
new PcjTables().createAndPopulatePcj(conn, accCon, tablename1, queryString1, new String[] { "e", "c", "l", "o" }, Optional.<PcjVarOrderFactory>absent());
new PcjTables().createAndPopulatePcj(conn, accCon, tablename2, queryString2, new String[] { "e", "c", "l", "o" }, Optional.<PcjVarOrderFactory>absent());
} catch (final RyaDAOException e) {
throw new Error("While creating PCJ tables.", e);
} finally {
closeQuietly(conn);
closeQuietly(repository);
}
}
use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.
the class PcjIntegrationTestingUtil method populatePcj.
/**
* Scan Rya for results that solve the PCJ's query and store them in the PCJ
* table.
* <p>
* This method assumes the PCJ table has already been created.
*
* @param accumuloConn
* - A connection to the Accumulo that hosts the PCJ table. (not
* null)
* @param pcjTableName
* - The name of the PCJ table that will receive the results.
* (not null)
* @param ryaConn
* - A connection to the Rya store that will be queried to find
* results. (not null)
* @throws PcjException
* If results could not be written to the PCJ table, the PCJ
* table does not exist, or the query that is being execute was
* malformed.
*/
public static void populatePcj(final Connector accumuloConn, final String pcjTableName, final RepositoryConnection ryaConn) throws PcjException {
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
checkNotNull(ryaConn);
try {
// Fetch the query that needs to be executed from the PCJ table.
final PcjMetadata pcjMetadata = new PcjTables().getPcjMetadata(accumuloConn, pcjTableName);
final String sparql = pcjMetadata.getSparql();
// Query Rya for results to the SPARQL query.
final TupleQuery query = ryaConn.prepareTupleQuery(QueryLanguage.SPARQL, sparql);
final TupleQueryResult results = query.evaluate();
// Load batches of 1000 of them at a time into the PCJ table
final Set<BindingSet> batch = new HashSet<>(1000);
while (results.hasNext()) {
batch.add(results.next());
if (batch.size() == 1000) {
addResults(accumuloConn, pcjTableName, batch);
batch.clear();
}
}
if (!batch.isEmpty()) {
addResults(accumuloConn, pcjTableName, batch);
}
} catch (RepositoryException | MalformedQueryException | QueryEvaluationException e) {
throw new PcjException("Could not populate a PCJ table with Rya results for the table named: " + pcjTableName, e);
}
}
use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.
the class PcjIntegrationTestingUtil method writeResults.
/**
* Add a collection of results to a specific PCJ table.
*
* @param accumuloConn
* - A connection to the Accumulo that hosts the PCJ table. (not
* null)
* @param pcjTableName
* - The name of the PCJ table that will receive the results.
* (not null)
* @param results
* - Binding sets that will be written to the PCJ table. (not
* null)
* @throws PcjException
* The provided PCJ table doesn't exist, is missing the PCJ
* metadata, or the result could not be written to it.
*/
private static void writeResults(final Connector accumuloConn, final String pcjTableName, final Collection<BindingSet> results) throws PcjException {
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
checkNotNull(results);
// Fetch the variable orders from the PCJ table.
final PcjMetadata metadata = new PcjTables().getPcjMetadata(accumuloConn, pcjTableName);
// Write each result formatted using each of the variable orders.
BatchWriter writer = null;
try {
writer = accumuloConn.createBatchWriter(pcjTableName, new BatchWriterConfig());
for (final BindingSet result : results) {
final Set<Mutation> addResultMutations = makeWriteResultMutations(metadata.getVarOrders(), result);
writer.addMutations(addResultMutations);
}
} catch (TableNotFoundException | MutationsRejectedException e) {
throw new PcjException("Could not add results to the PCJ table named: " + pcjTableName, e);
} finally {
if (writer != null) {
try {
writer.close();
} catch (final MutationsRejectedException e) {
throw new PcjException("Could not add results to a PCJ table because some of the mutations were rejected.", e);
}
}
}
}
use of org.apache.rya.indexing.pcj.storage.accumulo.PcjTables in project incubator-rya by apache.
the class PcjIntegrationTestingUtil method createAndPopulatePcj.
// ****************************Creation and Population of PcjTables Accumulo***************************
/**
* Creates a new PCJ Table in Accumulo and populates it by scanning an
* instance of Rya for historic matches.
* <p>
* If any portion of this operation fails along the way, the partially
* create PCJ table will be left in Accumulo.
*
* @param ryaConn - Connects to the Rya that will be scanned. (not null)
* @param accumuloConn - Connects to the accumulo that hosts the PCJ results. (not null)
* @param pcjTableName - The name of the PCJ table that will be created. (not null)
* @param sparql - The SPARQL query whose results will be loaded into the table. (not null)
* @param resultVariables - The variables that are included in the query's resulting binding sets. (not null)
* @param pcjVarOrderFactory - An optional factory that indicates the various variable orders
* the results will be stored in. If one is not provided, then {@link ShiftVarOrderFactory}
* is used by default. (not null)
* @throws PcjException The PCJ table could not be create or the values from
* Rya were not able to be loaded into it.
*/
public static void createAndPopulatePcj(final RepositoryConnection ryaConn, final Connector accumuloConn, final String pcjTableName, final String sparql, final String[] resultVariables, final Optional<PcjVarOrderFactory> pcjVarOrderFactory) throws PcjException {
checkNotNull(ryaConn);
checkNotNull(accumuloConn);
checkNotNull(pcjTableName);
checkNotNull(sparql);
checkNotNull(resultVariables);
checkNotNull(pcjVarOrderFactory);
final PcjTables pcj = new PcjTables();
// Create the PCJ's variable orders.
final PcjVarOrderFactory varOrderFactory = pcjVarOrderFactory.or(new ShiftVarOrderFactory());
final Set<VariableOrder> varOrders = varOrderFactory.makeVarOrders(new VariableOrder(resultVariables));
// Create the PCJ table in Accumulo.
pcj.createPcjTable(accumuloConn, pcjTableName, varOrders, sparql);
// Load historic matches from Rya into the PCJ table.
populatePcj(accumuloConn, pcjTableName, ryaConn);
}
Aggregations