Search in sources :

Example 1 with ByteRange

use of org.apache.rya.api.query.strategy.ByteRange in project incubator-rya by apache.

the class HashedSpoWholeRowTriplePatternStrategy method defineRange.

@Override
public Map.Entry<TABLE_LAYOUT, ByteRange> defineRange(final RyaURI subject, final RyaURI predicate, final RyaType object, final RyaURI context, final RdfCloudTripleStoreConfiguration conf) throws IOException {
    try {
        // s_r(p)(ng)
        if (!handles(subject, predicate, object, context)) {
            return null;
        }
        final MessageDigest md = MessageDigest.getInstance("MD5");
        final RyaContext ryaContext = RyaContext.getInstance();
        final TABLE_LAYOUT table_layout = TABLE_LAYOUT.SPO;
        byte[] start;
        byte[] stop;
        if (predicate != null) {
            if (object != null) {
                if (object instanceof RyaRange) {
                    // sp_r(o)
                    // range = sp_r(o.s)->sp_r(o.e) (remove last byte to remove type info)
                    RyaRange rv = (RyaRange) object;
                    rv = ryaContext.transformRange(rv);
                    final byte[] objStartBytes = ryaContext.serializeType(rv.getStart())[0];
                    final byte[] objEndBytes = ryaContext.serializeType(rv.getStop())[0];
                    final byte[] subjBytes = subject.getData().getBytes(StandardCharsets.UTF_8);
                    final byte[] hashSubj = Hex.encodeHexString(md.digest(subjBytes)).getBytes(StandardCharsets.UTF_8);
                    final byte[] predBytes = predicate.getData().getBytes(StandardCharsets.UTF_8);
                    start = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predBytes, DELIM_BYTES, objStartBytes);
                    stop = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predBytes, DELIM_BYTES, objEndBytes, DELIM_BYTES, LAST_BYTES);
                } else {
                    // spo
                    // range = spo->spo (remove last byte to remove type info)
                    // TODO: There must be a better way than creating multiple byte[]
                    final byte[] subjBytes = subject.getData().getBytes(StandardCharsets.UTF_8);
                    final byte[] hashSubj = Hex.encodeHexString(md.digest(subjBytes)).getBytes(StandardCharsets.UTF_8);
                    final byte[] objBytes = ryaContext.serializeType(object)[0];
                    start = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predicate.getData().getBytes(StandardCharsets.UTF_8), DELIM_BYTES, objBytes, TYPE_DELIM_BYTES);
                    stop = Bytes.concat(start, LAST_BYTES);
                }
            } else if (predicate instanceof RyaRange) {
                // s_r(p)
                // range = s_r(p.s)->s_r(p.e)
                RyaRange rv = (RyaRange) predicate;
                rv = ryaContext.transformRange(rv);
                final byte[] subjBytes = subject.getData().getBytes(StandardCharsets.UTF_8);
                final byte[] hashSubj = Hex.encodeHexString(md.digest(subjBytes)).getBytes(StandardCharsets.UTF_8);
                final byte[] predStartBytes = rv.getStart().getData().getBytes(StandardCharsets.UTF_8);
                final byte[] predStopBytes = rv.getStop().getData().getBytes(StandardCharsets.UTF_8);
                start = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predStartBytes);
                stop = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predStopBytes, DELIM_BYTES, LAST_BYTES);
            } else {
                // sp
                // range = sp
                final byte[] subjBytes = subject.getData().getBytes(StandardCharsets.UTF_8);
                final byte[] hashSubj = Hex.encodeHexString(md.digest(subjBytes)).getBytes(StandardCharsets.UTF_8);
                start = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES, predicate.getData().getBytes(StandardCharsets.UTF_8), DELIM_BYTES);
                stop = Bytes.concat(start, LAST_BYTES);
            }
        } else {
            // s
            // range = s
            final byte[] subjBytes = subject.getData().getBytes(StandardCharsets.UTF_8);
            final byte[] hashSubj = Hex.encodeHexString(md.digest(subjBytes)).getBytes(StandardCharsets.UTF_8);
            start = Bytes.concat(hashSubj, DELIM_BYTES, subjBytes, DELIM_BYTES);
            stop = Bytes.concat(start, LAST_BYTES);
        }
        return new RdfCloudTripleStoreUtils.CustomEntry<TABLE_LAYOUT, ByteRange>(table_layout, new ByteRange(start, stop));
    } catch (final RyaTypeResolverException e) {
        throw new IOException(e);
    } catch (final NoSuchAlgorithmException e) {
        throw new IOException(e);
    }
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) RyaContext(org.apache.rya.api.resolver.RyaContext) ByteRange(org.apache.rya.api.query.strategy.ByteRange) RyaRange(org.apache.rya.api.domain.RyaRange) RyaTypeResolverException(org.apache.rya.api.resolver.RyaTypeResolverException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) MessageDigest(java.security.MessageDigest)

Example 2 with ByteRange

use of org.apache.rya.api.query.strategy.ByteRange in project incubator-rya by apache.

the class OspWholeRowTriplePatternStrategy method defineRange.

@Override
public Map.Entry<TABLE_LAYOUT, ByteRange> defineRange(final RyaURI subject, final RyaURI predicate, final RyaType object, final RyaURI context, final RdfCloudTripleStoreConfiguration conf) throws IOException {
    try {
        // r(o)
        if (!handles(subject, predicate, object, context)) {
            return null;
        }
        final RyaContext ryaContext = RyaContext.getInstance();
        final TABLE_LAYOUT table_layout = TABLE_LAYOUT.OSP;
        byte[] start, stop;
        if (subject != null) {
            if (subject instanceof RyaRange) {
                // o_r(s)
                RyaRange ru = (RyaRange) subject;
                ru = ryaContext.transformRange(ru);
                final byte[] subjStartBytes = ru.getStart().getData().getBytes(StandardCharsets.UTF_8);
                final byte[] subjEndBytes = ru.getStop().getData().getBytes(StandardCharsets.UTF_8);
                final byte[] objBytes = ryaContext.serializeType(object)[0];
                start = Bytes.concat(objBytes, DELIM_BYTES, subjStartBytes);
                stop = Bytes.concat(objBytes, DELIM_BYTES, subjEndBytes, DELIM_BYTES, LAST_BYTES);
            } else {
                // os
                final byte[] objBytes = ryaContext.serializeType(object)[0];
                start = Bytes.concat(objBytes, DELIM_BYTES, subject.getData().getBytes(StandardCharsets.UTF_8), DELIM_BYTES);
                stop = Bytes.concat(start, LAST_BYTES);
            }
        } else {
            if (object instanceof RyaRange) {
                // r(o)
                RyaRange rv = (RyaRange) object;
                rv = ryaContext.transformRange(rv);
                start = ryaContext.serializeType(rv.getStart())[0];
                stop = Bytes.concat(ryaContext.serializeType(rv.getStop())[0], DELIM_BYTES, LAST_BYTES);
            } else {
                // o
                start = Bytes.concat(ryaContext.serializeType(object)[0], DELIM_BYTES);
                stop = Bytes.concat(start, LAST_BYTES);
            }
        }
        return new RdfCloudTripleStoreUtils.CustomEntry<TABLE_LAYOUT, ByteRange>(table_layout, new ByteRange(start, stop));
    } catch (final RyaTypeResolverException e) {
        throw new IOException(e);
    }
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) RyaContext(org.apache.rya.api.resolver.RyaContext) ByteRange(org.apache.rya.api.query.strategy.ByteRange) RyaRange(org.apache.rya.api.domain.RyaRange) RyaTypeResolverException(org.apache.rya.api.resolver.RyaTypeResolverException) IOException(java.io.IOException)

Example 3 with ByteRange

use of org.apache.rya.api.query.strategy.ByteRange in project incubator-rya by apache.

the class AccumuloQueryRuleset method getRange.

/**
 * Turn a single StatementPattern into a Range.
 * @param conf
 * @throws IOException if the range can't be resolved
 */
private Map.Entry<TABLE_LAYOUT, ByteRange> getRange(final StatementPattern sp) throws IOException {
    final Var context = sp.getContextVar();
    final Statement stmt = new NullableStatementImpl((Resource) sp.getSubjectVar().getValue(), (URI) sp.getPredicateVar().getValue(), sp.getObjectVar().getValue(), context == null ? null : (Resource) context.getValue());
    final RyaStatement rs = RdfToRyaConversions.convertStatement(stmt);
    final TriplePatternStrategy strategy = ryaContext.retrieveStrategy(rs);
    final Map.Entry<TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(rs.getSubject(), rs.getPredicate(), rs.getObject(), rs.getContext(), conf);
    return entry;
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) NullableStatementImpl(org.apache.rya.api.utils.NullableStatementImpl) TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) ByteRange(org.apache.rya.api.query.strategy.ByteRange) Var(org.openrdf.query.algebra.Var) Statement(org.openrdf.model.Statement) RyaStatement(org.apache.rya.api.domain.RyaStatement) Resource(org.openrdf.model.Resource) RyaStatement(org.apache.rya.api.domain.RyaStatement) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with ByteRange

use of org.apache.rya.api.query.strategy.ByteRange in project incubator-rya by apache.

the class StatementPatternStorage method createRange.

protected Map.Entry<TABLE_LAYOUT, Range> createRange(Value s_v, Value p_v, Value o_v) throws IOException {
    RyaURI subject_rya = RdfToRyaConversions.convertResource((Resource) s_v);
    RyaURI predicate_rya = RdfToRyaConversions.convertURI((URI) p_v);
    RyaType object_rya = RdfToRyaConversions.convertValue(o_v);
    TriplePatternStrategy strategy = ryaContext.retrieveStrategy(subject_rya, predicate_rya, object_rya, null);
    if (strategy == null) {
        return new RdfCloudTripleStoreUtils.CustomEntry<TABLE_LAYOUT, Range>(TABLE_LAYOUT.SPO, new Range());
    }
    Map.Entry<TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(subject_rya, predicate_rya, object_rya, null, null);
    ByteRange byteRange = entry.getValue();
    return new RdfCloudTripleStoreUtils.CustomEntry<org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT, Range>(entry.getKey(), new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd())));
}
Also used : TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) ByteRange(org.apache.rya.api.query.strategy.ByteRange) Text(org.apache.hadoop.io.Text) RyaType(org.apache.rya.api.domain.RyaType) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) RyaURI(org.apache.rya.api.domain.RyaURI) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) Map(java.util.Map)

Example 5 with ByteRange

use of org.apache.rya.api.query.strategy.ByteRange in project incubator-rya by apache.

the class AccumuloRyaQueryEngine method queryWithBindingSet.

@Override
public CloseableIteration<? extends Map.Entry<RyaStatement, BindingSet>, RyaDAOException> queryWithBindingSet(Collection<Map.Entry<RyaStatement, BindingSet>> stmts, AccumuloRdfConfiguration conf) throws RyaDAOException {
    if (conf == null) {
        conf = configuration;
    }
    // query configuration
    Authorizations authorizations = conf.getAuthorizations();
    Long ttl = conf.getTtl();
    Long maxResults = conf.getLimit();
    Integer maxRanges = conf.getMaxRangesForScanner();
    Integer numThreads = conf.getNumThreads();
    // TODO: cannot span multiple tables here
    try {
        Collection<Range> ranges = new HashSet<Range>();
        RangeBindingSetEntries rangeMap = new RangeBindingSetEntries();
        TABLE_LAYOUT layout = null;
        RyaURI context = null;
        TriplePatternStrategy strategy = null;
        RyaURI columnFamily = null;
        boolean columnFamilySet = false;
        for (Map.Entry<RyaStatement, BindingSet> stmtbs : stmts) {
            RyaStatement stmt = stmtbs.getKey();
            context = stmt.getContext();
            // Scanner will fetch all ColumnFamilies.
            if (!columnFamilySet) {
                columnFamily = context;
                columnFamilySet = true;
            } else if (columnFamily != null && !columnFamily.equals(context)) {
                columnFamily = null;
            }
            BindingSet bs = stmtbs.getValue();
            strategy = ryaContext.retrieveStrategy(stmt);
            if (strategy == null) {
                throw new IllegalArgumentException("TriplePattern[" + stmt + "] not supported");
            }
            Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext(), conf);
            // use range to set scanner
            // populate scanner based on authorizations, ttl
            layout = entry.getKey();
            ByteRange byteRange = entry.getValue();
            Range range = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
            Range rangeMapRange = range;
            // as the Value specified in the BindingSet
            if (context != null) {
                byte[] contextBytes = context.getData().getBytes("UTF-8");
                rangeMapRange = range.bound(new Column(contextBytes, new byte[] { (byte) 0x00 }, new byte[] { (byte) 0x00 }), new Column(contextBytes, new byte[] { (byte) 0xff }, new byte[] { (byte) 0xff }));
            }
            // ranges gets a Range that has no Column bounds, but
            // rangeMap gets a Range that does have Column bounds
            // If we inserted multiple Ranges with the same Row (but
            // distinct Column bounds) into the Set ranges, we would get
            // duplicate
            // results when the Row is not exact. So RyaStatements that
            // differ only in their context are all mapped to the same
            // Range (with no Column bounds) for scanning purposes.
            // However, context information is included in a Column that
            // bounds the Range inserted into rangeMap. This is because
            // in the class {@link RyaStatementBindingSetKeyValueIterator},
            // the rangeMap is
            // used to join the scan results with the BindingSets to produce
            // the query results. The additional ColumnFamily info is
            // required in this join
            // process to allow for the Statement contexts to be compared
            // with the BindingSet contexts
            // See {@link RangeBindingSetEntries#containsKey}.
            ranges.add(range);
            rangeMap.put(rangeMapRange, bs);
        }
        // no ranges. if strategy alone is null, it would be thrown in the loop above.
        if (layout == null || strategy == null) {
            return null;
        }
        String regexSubject = conf.getRegexSubject();
        String regexPredicate = conf.getRegexPredicate();
        String regexObject = conf.getRegexObject();
        TripleRowRegex tripleRowRegex = strategy.buildRegex(regexSubject, regexPredicate, regexObject, null, null);
        String table = layoutToTable(layout, conf);
        boolean useBatchScanner = ranges.size() > maxRanges;
        RyaStatementBindingSetKeyValueIterator iterator = null;
        if (useBatchScanner) {
            ScannerBase scanner = connector.createBatchScanner(table, authorizations, numThreads);
            ((BatchScanner) scanner).setRanges(ranges);
            fillScanner(scanner, columnFamily, null, ttl, null, tripleRowRegex, conf);
            iterator = new RyaStatementBindingSetKeyValueIterator(layout, ryaContext, scanner, rangeMap);
        } else {
            Scanner scannerBase = null;
            Iterator<Map.Entry<Key, Value>>[] iters = new Iterator[ranges.size()];
            int i = 0;
            for (Range range : ranges) {
                scannerBase = connector.createScanner(table, authorizations);
                scannerBase.setRange(range);
                fillScanner(scannerBase, columnFamily, null, ttl, null, tripleRowRegex, conf);
                iters[i] = scannerBase.iterator();
                i++;
            }
            iterator = new RyaStatementBindingSetKeyValueIterator(layout, Iterators.concat(iters), rangeMap, ryaContext);
        }
        if (maxResults != null) {
            iterator.setMaxResults(maxResults);
        }
        return iterator;
    } catch (Exception e) {
        throw new RyaDAOException(e);
    }
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) Scanner(org.apache.accumulo.core.client.Scanner) ByteRange(org.apache.rya.api.query.strategy.ByteRange) BatchScanner(org.apache.accumulo.core.client.BatchScanner) RyaStatement(org.apache.rya.api.domain.RyaStatement) Column(org.apache.accumulo.core.data.Column) TripleRowRegex(org.apache.rya.api.resolver.triple.TripleRowRegex) Iterator(java.util.Iterator) HashSet(java.util.HashSet) BindingSet(org.openrdf.query.BindingSet) Authorizations(org.apache.accumulo.core.security.Authorizations) TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) ScannerBase(org.apache.accumulo.core.client.ScannerBase) Text(org.apache.hadoop.io.Text) RyaRange(org.apache.rya.api.domain.RyaRange) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) IOException(java.io.IOException) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) RyaURI(org.apache.rya.api.domain.RyaURI) Value(org.apache.accumulo.core.data.Value) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Aggregations

ByteRange (org.apache.rya.api.query.strategy.ByteRange)26 Map (java.util.Map)20 RyaStatement (org.apache.rya.api.domain.RyaStatement)17 TripleRow (org.apache.rya.api.resolver.triple.TripleRow)13 TABLE_LAYOUT (org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT)9 IOException (java.io.IOException)8 RyaRange (org.apache.rya.api.domain.RyaRange)8 HashMap (java.util.HashMap)5 Range (org.apache.accumulo.core.data.Range)5 Text (org.apache.hadoop.io.Text)5 RyaURI (org.apache.rya.api.domain.RyaURI)5 TriplePatternStrategy (org.apache.rya.api.query.strategy.TriplePatternStrategy)5 RyaContext (org.apache.rya.api.resolver.RyaContext)5 RyaTypeResolverException (org.apache.rya.api.resolver.RyaTypeResolverException)5 BatchScanner (org.apache.accumulo.core.client.BatchScanner)3 Scanner (org.apache.accumulo.core.client.Scanner)3 Authorizations (org.apache.accumulo.core.security.Authorizations)3 RyaType (org.apache.rya.api.domain.RyaType)3 RyaDAOException (org.apache.rya.api.persist.RyaDAOException)3 TripleRowRegex (org.apache.rya.api.resolver.triple.TripleRowRegex)3