Search in sources :

Example 71 with VariableOrder

use of org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder in project incubator-rya by apache.

the class MongoPcjDocuments method getPcjMetadata.

/**
 * Gets the {@link PcjMetadata} from a provided PCJ Id.
 *
 * @param pcjId - The Id of the PCJ to get from MongoDB. (not null)
 * @return - The {@link PcjMetadata} of the Pcj specified.
 * @throws PCJStorageException The PCJ metadata document does not exist.
 */
public PcjMetadata getPcjMetadata(final String pcjId) throws PCJStorageException {
    requireNonNull(pcjId);
    // since query by ID, there will only be one.
    final Document result = pcjCollection.find(new Document(PCJ_METADATA_ID, makeMetadataID(pcjId))).first();
    if (result == null) {
        throw new PCJStorageException("The PCJ: " + pcjId + " does not exist.");
    }
    final String sparql = result.getString(SPARQL_FIELD);
    final int cardinality = result.getInteger(CARDINALITY_FIELD, 0);
    final List<List<String>> varOrders = (List<List<String>>) result.get(VAR_ORDER_FIELD);
    final Set<VariableOrder> varOrder = new HashSet<>();
    for (final List<String> vars : varOrders) {
        varOrder.add(new VariableOrder(vars));
    }
    return new PcjMetadata(sparql, cardinality, varOrder);
}
Also used : VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) ArrayList(java.util.ArrayList) List(java.util.List) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) Document(org.bson.Document) HashSet(java.util.HashSet)

Example 72 with VariableOrder

use of org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder in project incubator-rya by apache.

the class JoinBatchInformationTypeAdapter method serialize.

@Override
public JsonElement serialize(final JoinBatchInformation batch, final Type typeOfSrc, final JsonSerializationContext context) {
    final JsonObject result = new JsonObject();
    result.add("class", new JsonPrimitive(batch.getClass().getName()));
    result.add("batchSize", new JsonPrimitive(batch.getBatchSize()));
    result.add("task", new JsonPrimitive(batch.getTask().name()));
    final Column column = batch.getColumn();
    result.add("column", new JsonPrimitive(column.getsFamily() + "\u0000" + column.getsQualifier()));
    final Span span = batch.getSpan();
    result.add("span", new JsonPrimitive(span.getStart().getsRow() + "\u0000" + span.getEnd().getsRow()));
    result.add("startInc", new JsonPrimitive(span.isStartInclusive()));
    result.add("endInc", new JsonPrimitive(span.isEndInclusive()));
    result.add("side", new JsonPrimitive(batch.getSide().name()));
    result.add("joinType", new JsonPrimitive(batch.getJoinType().name()));
    final String updateVarOrderString = Joiner.on(";").join(batch.getBs().getBindingNames());
    final VariableOrder updateVarOrder = new VariableOrder(updateVarOrderString);
    result.add("bindingSet", new JsonPrimitive(converter.convert(batch.getBs(), updateVarOrder)));
    result.add("updateVarOrder", new JsonPrimitive(updateVarOrderString));
    return result;
}
Also used : JsonPrimitive(com.google.gson.JsonPrimitive) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) JsonObject(com.google.gson.JsonObject) Span(org.apache.fluo.api.data.Span)

Example 73 with VariableOrder

use of org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder in project incubator-rya by apache.

the class MongoPcjStorageIT method addResults.

@Test
public void addResults() throws Exception {
    try (final PrecomputedJoinStorage pcjStorage = new MongoPcjStorage(getMongoClient(), conf.getRyaInstanceName())) {
        final MongoRyaInstanceDetailsRepository detailsRepo = new MongoRyaInstanceDetailsRepository(getMongoClient(), conf.getRyaInstanceName());
        detailsRepo.initialize(RyaDetails.builder().setRyaInstanceName(conf.getRyaInstanceName()).setRyaVersion("test").setEntityCentricIndexDetails(new EntityCentricIndexDetails(false)).setTemporalIndexDetails(new TemporalIndexDetails(false)).setFreeTextDetails(new FreeTextIndexDetails(false)).setProspectorDetails(new ProspectorDetails(Optional.absent())).setJoinSelectivityDetails(new JoinSelectivityDetails(Optional.absent())).setPCJIndexDetails(PCJIndexDetails.builder().setEnabled(true)).build());
        // Create a PCJ.
        final String sparql = "SELECT * WHERE { ?a <http://isA> ?b }";
        final String pcjId = pcjStorage.createPcj(sparql);
        // Add some binding sets to it.
        final Set<VisibilityBindingSet> results = new HashSet<>();
        final MapBindingSet aliceBS = new MapBindingSet();
        aliceBS.addBinding("a", new URIImpl("http://Alice"));
        aliceBS.addBinding("b", new URIImpl("http://Person"));
        results.add(new VisibilityBindingSet(aliceBS, ""));
        final MapBindingSet charlieBS = new MapBindingSet();
        charlieBS.addBinding("a", new URIImpl("http://Charlie"));
        charlieBS.addBinding("b", new URIImpl("http://Comedian"));
        results.add(new VisibilityBindingSet(charlieBS, ""));
        pcjStorage.addResults(pcjId, results);
        // Make sure the PCJ metadata was updated.
        final PcjMetadata metadata = pcjStorage.getPcjMetadata(pcjId);
        final Set<VariableOrder> varOrders = new ShiftVarOrderFactory().makeVarOrders(sparql);
        final PcjMetadata expectedMetadata = new PcjMetadata(sparql, 2L, varOrders);
        assertEquals(expectedMetadata, metadata);
    }
}
Also used : ProspectorDetails(org.apache.rya.api.instance.RyaDetails.ProspectorDetails) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) MongoRyaInstanceDetailsRepository(org.apache.rya.mongodb.instance.MongoRyaInstanceDetailsRepository) ShiftVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory) URIImpl(org.openrdf.model.impl.URIImpl) JoinSelectivityDetails(org.apache.rya.api.instance.RyaDetails.JoinSelectivityDetails) EntityCentricIndexDetails(org.apache.rya.api.instance.RyaDetails.EntityCentricIndexDetails) TemporalIndexDetails(org.apache.rya.api.instance.RyaDetails.TemporalIndexDetails) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) FreeTextIndexDetails(org.apache.rya.api.instance.RyaDetails.FreeTextIndexDetails) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 74 with VariableOrder

use of org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method makeWriteResultMutations.

/**
 * Create the {@link Mutations} required to write a new {@link BindingSet}
 * to a PCJ table for each {@link VariableOrder} that is provided.
 *
 * @param varOrders
 *            - The variables orders the result will be written to. (not
 *            null)
 * @param result
 *            - A new PCJ result. (not null)
 * @return Mutation that will write the result to a PCJ table.
 * @throws PcjException
 *             The binding set could not be encoded.
 */
private static Set<Mutation> makeWriteResultMutations(final Set<VariableOrder> varOrders, final BindingSet result) throws PcjException {
    checkNotNull(varOrders);
    checkNotNull(result);
    final Set<Mutation> mutations = new HashSet<>();
    for (final VariableOrder varOrder : varOrders) {
        try {
            // Serialize the result to the variable order.
            final byte[] serializedResult = converter.convert(result, varOrder);
            // Row ID = binding set values, Column Family = variable order
            // of the binding set.
            final Mutation addResult = new Mutation(serializedResult);
            addResult.put(varOrder.toString(), "", "");
            mutations.add(addResult);
        } catch (final BindingSetConversionException e) {
            throw new PcjException("Could not serialize a result.", e);
        }
    }
    return mutations;
}
Also used : VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) Mutation(org.apache.accumulo.core.data.Mutation) BindingSetConversionException(org.apache.rya.indexing.pcj.storage.accumulo.BindingSetConverter.BindingSetConversionException) HashSet(java.util.HashSet)

Example 75 with VariableOrder

use of org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder in project incubator-rya by apache.

the class PcjIntegrationTestingUtil method createAndPopulatePcj.

// ****************************Creation and Population of PcjTables Accumulo***************************
/**
 * Creates a new PCJ Table in Accumulo and populates it by scanning an
 * instance of Rya for historic matches.
 * <p>
 * If any portion of this operation fails along the way, the partially
 * create PCJ table will be left in Accumulo.
 *
 * @param ryaConn - Connects to the Rya that will be scanned. (not null)
 * @param accumuloConn - Connects to the accumulo that hosts the PCJ results. (not null)
 * @param pcjTableName - The name of the PCJ table that will be created. (not null)
 * @param sparql - The SPARQL query whose results will be loaded into the table. (not null)
 * @param resultVariables - The variables that are included in the query's resulting binding sets. (not null)
 * @param pcjVarOrderFactory - An optional factory that indicates the various variable orders
 *   the results will be stored in. If one is not provided, then {@link ShiftVarOrderFactory}
 *   is used by default. (not null)
 * @throws PcjException The PCJ table could not be create or the values from
 *   Rya were not able to be loaded into it.
 */
public static void createAndPopulatePcj(final RepositoryConnection ryaConn, final Connector accumuloConn, final String pcjTableName, final String sparql, final String[] resultVariables, final Optional<PcjVarOrderFactory> pcjVarOrderFactory) throws PcjException {
    checkNotNull(ryaConn);
    checkNotNull(accumuloConn);
    checkNotNull(pcjTableName);
    checkNotNull(sparql);
    checkNotNull(resultVariables);
    checkNotNull(pcjVarOrderFactory);
    final PcjTables pcj = new PcjTables();
    // Create the PCJ's variable orders.
    final PcjVarOrderFactory varOrderFactory = pcjVarOrderFactory.or(new ShiftVarOrderFactory());
    final Set<VariableOrder> varOrders = varOrderFactory.makeVarOrders(new VariableOrder(resultVariables));
    // Create the PCJ table in Accumulo.
    pcj.createPcjTable(accumuloConn, pcjTableName, varOrders, sparql);
    // Load historic matches from Rya into the PCJ table.
    populatePcj(accumuloConn, pcjTableName, ryaConn);
}
Also used : PcjVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.PcjVarOrderFactory) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) ShiftVarOrderFactory(org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory) PcjTables(org.apache.rya.indexing.pcj.storage.accumulo.PcjTables)

Aggregations

VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)79 Test (org.junit.Test)47 HashSet (java.util.HashSet)18 MapBindingSet (org.openrdf.query.impl.MapBindingSet)18 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)17 PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)16 QueryBindingSet (org.openrdf.query.algebra.evaluation.QueryBindingSet)15 Bytes (org.apache.fluo.api.data.Bytes)14 BindingSet (org.openrdf.query.BindingSet)14 Column (org.apache.fluo.api.data.Column)13 FluoClient (org.apache.fluo.api.client.FluoClient)12 URIImpl (org.openrdf.model.impl.URIImpl)12 Transaction (org.apache.fluo.api.client.Transaction)11 Snapshot (org.apache.fluo.api.client.Snapshot)10 ShiftVarOrderFactory (org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory)10 AccumuloPcjSerializer (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjSerializer)9 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)8 Connector (org.apache.accumulo.core.client.Connector)5 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)5 RowColumn (org.apache.fluo.api.data.RowColumn)4