Search in sources :

Example 26 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class AccumuloQueryRuleset method getRules.

/**
 * Get the rules that apply to all statements within a Range. The range may not
 * contain every row relevant to the associated rule(s), but every row within the
 * range is relevant to the rule(s).
 * @param layout Defines which table the range is meant to scan
 * @param range The Range of rows in that table
 * @return Any rules in this ruleset that match the given table and contain the given range
 * @throws IOException if the Range can't be resolved
 */
public List<CopyRule> getRules(final TABLE_LAYOUT layout, final Range range) throws IOException {
    final List<CopyRule> matchingRules = new LinkedList<>();
    for (final CopyRule rule : rules) {
        // Compare the rule to the given range
        final Map.Entry<TABLE_LAYOUT, ByteRange> entry = getRange(rule.getStatement());
        final TABLE_LAYOUT ruleLayout = entry.getKey();
        // If they apply to different tables, they are unrelated.
        if (!ruleLayout.equals(layout)) {
            continue;
        }
        // If the given range is contained in (or equal to) the rule's range, then the
        // rule matches and should be included.
        final ByteRange byteRange = entry.getValue();
        final Range ruleRange = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
        if (rangeContainsRange(ruleRange, range)) {
            matchingRules.add(rule);
        }
    }
    return matchingRules;
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) ByteRange(org.apache.rya.api.query.strategy.ByteRange) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) HashMap(java.util.HashMap) Map(java.util.Map) LinkedList(java.util.LinkedList)

Example 27 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class BaseRuleMapper method setup.

@Override
protected void setup(final Context context) throws IOException, InterruptedException {
    final Configuration conf = context.getConfiguration();
    split = (RangeInputSplit) context.getInputSplit();
    final Range range = split.getRange();
    // Determine the table and table layout we're scanning
    parentTableName = split.getTableName();
    parentTablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY);
    for (final TABLE_LAYOUT layout : TABLE_LAYOUT.values()) {
        final String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(layout, parentTablePrefix);
        if (tableName.equals(parentTableName)) {
            parentLayout = layout;
        }
    }
    conf.set(MergeTool.TABLE_NAME_PROP, parentTableName);
    // Set up connections and parent/child table information, if necessary
    super.setup(context);
    // If we're working at the statement level, get the relevant rules and conditions:
    if (parentLayout != null) {
        AccumuloQueryRuleset ruleset;
        try {
            ruleset = new AccumuloQueryRuleset(new AccumuloRdfConfiguration(conf));
        } catch (final QueryRulesetException e) {
            throw new IOException("Error parsing the input query", e);
        }
        final List<CopyRule> rules = ruleset.getRules(parentLayout, range);
        for (final CopyRule rule : rules) {
            log.info("Mapper applies to rule:");
            for (final String line : rule.toString().split("\n")) {
                log.info("\t" + line);
            }
        }
        // this input split will receive, so if any condition is true we'll want to copy the statement.
        for (final CopyRule rule : rules) {
            // (even if there are redundant rules with conditions)
            if (rule.getCondition() == null) {
                condition = null;
                break;
            } else // If there is a set of conditions, matching it means we should accept the statement.
            if (condition == null) {
                condition = rule.getCondition();
            } else // If there are more than one rules that match, satisfying any conditions means we should accept.
            {
                condition = new Or(condition, rule.getCondition());
            }
        }
        // Set up the strategy to evaluate those conditions
        strategy = new ParallelEvaluationStrategyImpl(null, null, null, childAccumuloRdfConfiguration);
        // Log info about the split and combined condition
        log.info("Table: " + parentTableName);
        log.info("Range:");
        log.info("\tfrom " + keyToString(range.getStartKey(), Integer.MAX_VALUE));
        log.info("\tto " + keyToString(range.getEndKey(), Integer.MAX_VALUE));
        if (condition == null) {
            log.info("Condition: none");
        } else {
            log.info("Condition:");
            for (final String line : condition.toString().split("\n")) {
                log.info("\t" + line);
            }
        }
    } else {
        log.info("(Copying all rows from " + parentTableName + " directly.)");
    }
}
Also used : CopyRule(org.apache.rya.accumulo.mr.merge.util.CopyRule) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) AccumuloQueryRuleset(org.apache.rya.accumulo.mr.merge.util.AccumuloQueryRuleset) Or(org.openrdf.query.algebra.Or) Configuration(org.apache.hadoop.conf.Configuration) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) ParallelEvaluationStrategyImpl(org.apache.rya.rdftriplestore.evaluation.ParallelEvaluationStrategyImpl) QueryRulesetException(org.apache.rya.accumulo.mr.merge.util.QueryRuleset.QueryRulesetException) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration)

Example 28 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class AccumuloRyaQueryEngine method query.

@Override
public CloseableIterable<RyaStatement> query(BatchRyaQuery ryaQuery) throws RyaDAOException {
    Preconditions.checkNotNull(ryaQuery);
    Iterable<RyaStatement> stmts = ryaQuery.getQueries();
    Preconditions.checkNotNull(stmts);
    // query configuration
    String[] auths = ryaQuery.getAuths();
    final Authorizations authorizations = auths != null ? new Authorizations(auths) : configuration.getAuthorizations();
    final Long ttl = ryaQuery.getTtl();
    Long currentTime = ryaQuery.getCurrentTime();
    Long maxResults = ryaQuery.getMaxResults();
    Integer batchSize = ryaQuery.getBatchSize();
    Integer numQueryThreads = ryaQuery.getNumQueryThreads();
    String regexSubject = ryaQuery.getRegexSubject();
    String regexPredicate = ryaQuery.getRegexPredicate();
    String regexObject = ryaQuery.getRegexObject();
    TableLayoutStrategy tableLayoutStrategy = configuration.getTableLayoutStrategy();
    int maxRanges = ryaQuery.getMaxRanges();
    // TODO: cannot span multiple tables here
    try {
        Collection<Range> ranges = new HashSet<Range>();
        TABLE_LAYOUT layout = null;
        RyaURI context = null;
        TriplePatternStrategy strategy = null;
        for (RyaStatement stmt : stmts) {
            // TODO: This will be overwritten
            context = stmt.getContext();
            strategy = ryaContext.retrieveStrategy(stmt);
            if (strategy == null) {
                throw new IllegalArgumentException("TriplePattern[" + stmt + "] not supported");
            }
            Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext(), null);
            // use range to set scanner
            // populate scanner based on authorizations, ttl
            layout = entry.getKey();
            ByteRange byteRange = entry.getValue();
            Range range = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
            ranges.add(range);
        }
        // no ranges
        if (layout == null || strategy == null)
            throw new IllegalArgumentException("No table layout specified, or no statements.");
        final TripleRowRegex tripleRowRegex = strategy.buildRegex(regexSubject, regexPredicate, regexObject, null, null);
        final String table = layoutToTable(layout, tableLayoutStrategy);
        boolean useBatchScanner = ranges.size() > maxRanges;
        FluentCloseableIterable<RyaStatement> results = null;
        if (useBatchScanner) {
            BatchScanner scanner = connector.createBatchScanner(table, authorizations, numQueryThreads);
            scanner.setRanges(ranges);
            fillScanner(scanner, context, null, ttl, null, tripleRowRegex, ryaQuery.getConf());
            results = FluentCloseableIterable.from(new ScannerBaseCloseableIterable(scanner)).transform(keyValueToRyaStatementFunctionMap.get(layout));
        } else {
            final RyaURI fcontext = context;
            final RdfCloudTripleStoreConfiguration fconf = ryaQuery.getConf();
            FluentIterable<RyaStatement> fluent = FluentIterable.from(ranges).transformAndConcat(new Function<Range, Iterable<Map.Entry<Key, Value>>>() {

                @Override
                public Iterable<Map.Entry<Key, Value>> apply(Range range) {
                    try {
                        Scanner scanner = connector.createScanner(table, authorizations);
                        scanner.setRange(range);
                        fillScanner(scanner, fcontext, null, ttl, null, tripleRowRegex, fconf);
                        return scanner;
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            }).transform(keyValueToRyaStatementFunctionMap.get(layout));
            results = FluentCloseableIterable.from(CloseableIterables.wrap(fluent));
        }
        if (maxResults != null) {
            results = results.limit(maxResults.intValue());
        }
        return results;
    } catch (Exception e) {
        throw new RyaDAOException(e);
    }
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) Scanner(org.apache.accumulo.core.client.Scanner) ByteRange(org.apache.rya.api.query.strategy.ByteRange) BatchScanner(org.apache.accumulo.core.client.BatchScanner) RyaStatement(org.apache.rya.api.domain.RyaStatement) Function(com.google.common.base.Function) TripleRowRegex(org.apache.rya.api.resolver.triple.TripleRowRegex) HashSet(java.util.HashSet) TableLayoutStrategy(org.apache.rya.api.layout.TableLayoutStrategy) Authorizations(org.apache.accumulo.core.security.Authorizations) TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) Text(org.apache.hadoop.io.Text) RyaRange(org.apache.rya.api.domain.RyaRange) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) IOException(java.io.IOException) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) RyaURI(org.apache.rya.api.domain.RyaURI) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) RdfCloudTripleStoreConfiguration(org.apache.rya.api.RdfCloudTripleStoreConfiguration) HashMap(java.util.HashMap) Map(java.util.Map)

Example 29 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class RyaTableMutationsFactory method serializeDelete.

public Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serializeDelete(RyaStatement stmt) throws IOException {
    Collection<Mutation> spo_muts = new ArrayList<Mutation>();
    Collection<Mutation> po_muts = new ArrayList<Mutation>();
    Collection<Mutation> osp_muts = new ArrayList<Mutation>();
    /**
     * TODO: If there are contexts, do we still replicate the information into the default graph as well
     * as the named graphs?
     */
    try {
        Map<TABLE_LAYOUT, TripleRow> rowMap = ryaContext.serializeTriple(stmt);
        TripleRow tripleRow = rowMap.get(TABLE_LAYOUT.SPO);
        spo_muts.add(deleteMutation(tripleRow));
        tripleRow = rowMap.get(TABLE_LAYOUT.PO);
        po_muts.add(deleteMutation(tripleRow));
        tripleRow = rowMap.get(TABLE_LAYOUT.OSP);
        osp_muts.add(deleteMutation(tripleRow));
    } catch (TripleRowResolverException fe) {
        throw new IOException(fe);
    }
    Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> mutations = new HashMap<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>>();
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO, spo_muts);
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO, po_muts);
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.OSP, osp_muts);
    return mutations;
}
Also used : TripleRowResolverException(org.apache.rya.api.resolver.triple.TripleRowResolverException) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) TripleRow(org.apache.rya.api.resolver.triple.TripleRow) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Collection(java.util.Collection) Mutation(org.apache.accumulo.core.data.Mutation) IOException(java.io.IOException) RdfCloudTripleStoreConstants(org.apache.rya.api.RdfCloudTripleStoreConstants)

Example 30 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class RyaTableMutationsFactory method serialize.

// TODO: Does this still need to be collections
public Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize(RyaStatement stmt) throws IOException {
    Collection<Mutation> spo_muts = new ArrayList<Mutation>();
    Collection<Mutation> po_muts = new ArrayList<Mutation>();
    Collection<Mutation> osp_muts = new ArrayList<Mutation>();
    /**
     * TODO: If there are contexts, do we still replicate the information into the default graph as well
     * as the named graphs?
     */
    try {
        Map<TABLE_LAYOUT, TripleRow> rowMap = ryaContext.serializeTriple(stmt);
        TripleRow tripleRow = rowMap.get(TABLE_LAYOUT.SPO);
        spo_muts.add(createMutation(tripleRow));
        tripleRow = rowMap.get(TABLE_LAYOUT.PO);
        po_muts.add(createMutation(tripleRow));
        tripleRow = rowMap.get(TABLE_LAYOUT.OSP);
        osp_muts.add(createMutation(tripleRow));
    } catch (TripleRowResolverException fe) {
        throw new IOException(fe);
    }
    Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> mutations = new HashMap<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>>();
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO, spo_muts);
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO, po_muts);
    mutations.put(RdfCloudTripleStoreConstants.TABLE_LAYOUT.OSP, osp_muts);
    return mutations;
}
Also used : TripleRowResolverException(org.apache.rya.api.resolver.triple.TripleRowResolverException) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) TripleRow(org.apache.rya.api.resolver.triple.TripleRow) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Collection(java.util.Collection) Mutation(org.apache.accumulo.core.data.Mutation) IOException(java.io.IOException) RdfCloudTripleStoreConstants(org.apache.rya.api.RdfCloudTripleStoreConstants)

Aggregations

TABLE_LAYOUT (org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT)32 TripleRow (org.apache.rya.api.resolver.triple.TripleRow)17 RyaStatement (org.apache.rya.api.domain.RyaStatement)14 RyaURI (org.apache.rya.api.domain.RyaURI)14 IOException (java.io.IOException)12 Map (java.util.Map)11 Range (org.apache.accumulo.core.data.Range)11 Text (org.apache.hadoop.io.Text)11 RyaType (org.apache.rya.api.domain.RyaType)11 ByteRange (org.apache.rya.api.query.strategy.ByteRange)11 HashMap (java.util.HashMap)10 Key (org.apache.accumulo.core.data.Key)9 Value (org.apache.accumulo.core.data.Value)9 Mutation (org.apache.accumulo.core.data.Mutation)8 Authorizations (org.apache.accumulo.core.security.Authorizations)8 Scanner (org.apache.accumulo.core.client.Scanner)7 Test (org.junit.Test)7 RyaRange (org.apache.rya.api.domain.RyaRange)6 IntWritable (org.apache.hadoop.io.IntWritable)5 RyaDAOException (org.apache.rya.api.persist.RyaDAOException)5