use of org.apache.rya.api.resolver.triple.TripleRowRegex in project incubator-rya by apache.
the class AccumuloRyaQueryEngine method queryWithBindingSet.
@Override
public CloseableIteration<? extends Map.Entry<RyaStatement, BindingSet>, RyaDAOException> queryWithBindingSet(Collection<Map.Entry<RyaStatement, BindingSet>> stmts, AccumuloRdfConfiguration conf) throws RyaDAOException {
if (conf == null) {
conf = configuration;
}
// query configuration
Authorizations authorizations = conf.getAuthorizations();
Long ttl = conf.getTtl();
Long maxResults = conf.getLimit();
Integer maxRanges = conf.getMaxRangesForScanner();
Integer numThreads = conf.getNumThreads();
// TODO: cannot span multiple tables here
try {
Collection<Range> ranges = new HashSet<Range>();
RangeBindingSetEntries rangeMap = new RangeBindingSetEntries();
TABLE_LAYOUT layout = null;
RyaURI context = null;
TriplePatternStrategy strategy = null;
RyaURI columnFamily = null;
boolean columnFamilySet = false;
for (Map.Entry<RyaStatement, BindingSet> stmtbs : stmts) {
RyaStatement stmt = stmtbs.getKey();
context = stmt.getContext();
// Scanner will fetch all ColumnFamilies.
if (!columnFamilySet) {
columnFamily = context;
columnFamilySet = true;
} else if (columnFamily != null && !columnFamily.equals(context)) {
columnFamily = null;
}
BindingSet bs = stmtbs.getValue();
strategy = ryaContext.retrieveStrategy(stmt);
if (strategy == null) {
throw new IllegalArgumentException("TriplePattern[" + stmt + "] not supported");
}
Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext(), conf);
// use range to set scanner
// populate scanner based on authorizations, ttl
layout = entry.getKey();
ByteRange byteRange = entry.getValue();
Range range = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
Range rangeMapRange = range;
// as the Value specified in the BindingSet
if (context != null) {
byte[] contextBytes = context.getData().getBytes("UTF-8");
rangeMapRange = range.bound(new Column(contextBytes, new byte[] { (byte) 0x00 }, new byte[] { (byte) 0x00 }), new Column(contextBytes, new byte[] { (byte) 0xff }, new byte[] { (byte) 0xff }));
}
// ranges gets a Range that has no Column bounds, but
// rangeMap gets a Range that does have Column bounds
// If we inserted multiple Ranges with the same Row (but
// distinct Column bounds) into the Set ranges, we would get
// duplicate
// results when the Row is not exact. So RyaStatements that
// differ only in their context are all mapped to the same
// Range (with no Column bounds) for scanning purposes.
// However, context information is included in a Column that
// bounds the Range inserted into rangeMap. This is because
// in the class {@link RyaStatementBindingSetKeyValueIterator},
// the rangeMap is
// used to join the scan results with the BindingSets to produce
// the query results. The additional ColumnFamily info is
// required in this join
// process to allow for the Statement contexts to be compared
// with the BindingSet contexts
// See {@link RangeBindingSetEntries#containsKey}.
ranges.add(range);
rangeMap.put(rangeMapRange, bs);
}
// no ranges. if strategy alone is null, it would be thrown in the loop above.
if (layout == null || strategy == null) {
return null;
}
String regexSubject = conf.getRegexSubject();
String regexPredicate = conf.getRegexPredicate();
String regexObject = conf.getRegexObject();
TripleRowRegex tripleRowRegex = strategy.buildRegex(regexSubject, regexPredicate, regexObject, null, null);
String table = layoutToTable(layout, conf);
boolean useBatchScanner = ranges.size() > maxRanges;
RyaStatementBindingSetKeyValueIterator iterator = null;
if (useBatchScanner) {
ScannerBase scanner = connector.createBatchScanner(table, authorizations, numThreads);
((BatchScanner) scanner).setRanges(ranges);
fillScanner(scanner, columnFamily, null, ttl, null, tripleRowRegex, conf);
iterator = new RyaStatementBindingSetKeyValueIterator(layout, ryaContext, scanner, rangeMap);
} else {
Scanner scannerBase = null;
Iterator<Map.Entry<Key, Value>>[] iters = new Iterator[ranges.size()];
int i = 0;
for (Range range : ranges) {
scannerBase = connector.createScanner(table, authorizations);
scannerBase.setRange(range);
fillScanner(scannerBase, columnFamily, null, ttl, null, tripleRowRegex, conf);
iters[i] = scannerBase.iterator();
i++;
}
iterator = new RyaStatementBindingSetKeyValueIterator(layout, Iterators.concat(iters), rangeMap, ryaContext);
}
if (maxResults != null) {
iterator.setMaxResults(maxResults);
}
return iterator;
} catch (Exception e) {
throw new RyaDAOException(e);
}
}
use of org.apache.rya.api.resolver.triple.TripleRowRegex in project incubator-rya by apache.
the class AccumuloRyaQueryEngine method query.
@Override
public CloseableIterable<RyaStatement> query(RyaQuery ryaQuery) throws RyaDAOException {
Preconditions.checkNotNull(ryaQuery);
RyaStatement stmt = ryaQuery.getQuery();
Preconditions.checkNotNull(stmt);
// query configuration
String[] auths = ryaQuery.getAuths();
Authorizations authorizations = auths != null ? new Authorizations(auths) : configuration.getAuthorizations();
Long ttl = ryaQuery.getTtl();
Long currentTime = ryaQuery.getCurrentTime();
Long maxResults = ryaQuery.getMaxResults();
Integer batchSize = ryaQuery.getBatchSize();
String regexSubject = ryaQuery.getRegexSubject();
String regexPredicate = ryaQuery.getRegexPredicate();
String regexObject = ryaQuery.getRegexObject();
TableLayoutStrategy tableLayoutStrategy = configuration.getTableLayoutStrategy();
try {
// find triple pattern range
TriplePatternStrategy strategy = ryaContext.retrieveStrategy(stmt);
TABLE_LAYOUT layout;
Range range;
RyaURI subject = stmt.getSubject();
RyaURI predicate = stmt.getPredicate();
RyaType object = stmt.getObject();
RyaURI context = stmt.getContext();
String qualifier = stmt.getQualifer();
TripleRowRegex tripleRowRegex = null;
if (strategy != null) {
// otherwise, full table scan is supported
Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(subject, predicate, object, context, null);
layout = entry.getKey();
ByteRange byteRange = entry.getValue();
range = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
} else {
range = new Range();
layout = TABLE_LAYOUT.SPO;
strategy = ryaContext.retrieveStrategy(layout);
}
byte[] objectTypeInfo = null;
if (object != null) {
// TODO: Not good to serialize this twice
if (object instanceof RyaRange) {
objectTypeInfo = RyaContext.getInstance().serializeType(((RyaRange) object).getStart())[1];
} else {
objectTypeInfo = RyaContext.getInstance().serializeType(object)[1];
}
}
tripleRowRegex = strategy.buildRegex(regexSubject, regexPredicate, regexObject, null, objectTypeInfo);
// use range to set scanner
// populate scanner based on authorizations, ttl
String table = layoutToTable(layout, tableLayoutStrategy);
Scanner scanner = connector.createScanner(table, authorizations);
scanner.setRange(range);
if (batchSize != null) {
scanner.setBatchSize(batchSize);
}
fillScanner(scanner, context, qualifier, ttl, currentTime, tripleRowRegex, ryaQuery.getConf());
FluentCloseableIterable<RyaStatement> results = FluentCloseableIterable.from(new ScannerBaseCloseableIterable(scanner)).transform(keyValueToRyaStatementFunctionMap.get(layout));
if (maxResults != null) {
results = results.limit(maxResults.intValue());
}
return results;
} catch (Exception e) {
throw new RyaDAOException(e);
}
}
use of org.apache.rya.api.resolver.triple.TripleRowRegex in project incubator-rya by apache.
the class AbstractTriplePatternStrategyTest method testObjectTypeInfo.
public void testObjectTypeInfo() throws Exception {
RyaURI subj = new RyaURI("urn:test#1234");
RyaURI pred = new RyaURI("urn:test#pred");
RyaType obj = new RyaType(XMLSchema.LONG, "10");
RyaStatement ryaStatement = new RyaStatement(subj, pred, obj);
Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, TripleRow> serialize = RyaTripleContext.getInstance(new MockRdfConfiguration()).serializeTriple(ryaStatement);
TripleRow tripleRow = serialize.get(SPO);
String row = new String(tripleRow.getRow());
TriplePatternStrategy spoStrategy = new SpoWholeRowTriplePatternStrategy();
// obj
byte[][] bytes = RyaContext.getInstance().serializeType(obj);
String objStr = new String(bytes[0]);
byte[] objectTypeInfo = bytes[1];
TripleRowRegex tripleRowRegex = spoStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
Pattern p = Pattern.compile(tripleRowRegex.getRow());
Matcher matcher = p.matcher(row);
assertTrue(matcher.matches());
// build row with same object str data
Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, TripleRow> dupTriple_str = RyaTripleContext.getInstance(new MockRdfConfiguration()).serializeTriple(new RyaStatement(subj, pred, new RyaType(XMLSchema.STRING, objStr)));
TripleRow tripleRow_dup_str = dupTriple_str.get(SPO);
row = new String(tripleRow_dup_str.getRow());
spoStrategy = new SpoWholeRowTriplePatternStrategy();
tripleRowRegex = spoStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertFalse(matcher.matches());
// po table
TriplePatternStrategy poStrategy = new PoWholeRowTriplePatternStrategy();
tripleRowRegex = poStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
p = Pattern.compile(tripleRowRegex.getRow());
String po_row = new String(serialize.get(PO).getRow());
matcher = p.matcher(po_row);
assertTrue(matcher.matches());
tripleRowRegex = poStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(new String(dupTriple_str.get(PO).getRow()));
assertFalse(matcher.matches());
// osp table
TriplePatternStrategy ospStrategy = new OspWholeRowTriplePatternStrategy();
tripleRowRegex = ospStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
p = Pattern.compile(tripleRowRegex.getRow());
String osp_row = new String(serialize.get(OSP).getRow());
matcher = p.matcher(osp_row);
assertTrue(matcher.matches());
tripleRowRegex = ospStrategy.buildRegex(null, null, objStr, null, objectTypeInfo);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(new String(dupTriple_str.get(OSP).getRow()));
assertFalse(matcher.matches());
}
use of org.apache.rya.api.resolver.triple.TripleRowRegex in project incubator-rya by apache.
the class HashedPoWholeRowTriplePatternStrategyTest method testRegex.
public void testRegex() throws Exception {
RyaURI subj = new RyaURI("urn:test#1234");
RyaURI pred = new RyaURI("urn:test#pred");
RyaURI obj = new RyaURI("urn:test#obj");
RyaStatement ryaStatement = new RyaStatement(subj, pred, obj);
Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, TripleRow> serialize = new WholeRowHashedTripleResolver().serialize(ryaStatement);
TripleRow tripleRow = serialize.get(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO);
String row = new String(tripleRow.getRow());
TriplePatternStrategy spoStrategy = new HashedSpoWholeRowTriplePatternStrategy();
TriplePatternStrategy poStrategy = new HashedPoWholeRowTriplePatternStrategy();
TriplePatternStrategy ospStrategy = new OspWholeRowTriplePatternStrategy();
// pred
TripleRowRegex tripleRowRegex = spoStrategy.buildRegex(null, pred.getData(), null, null, null);
Pattern p = Pattern.compile(tripleRowRegex.getRow());
Matcher matcher = p.matcher(row);
assertTrue(matcher.matches());
// subj
tripleRowRegex = spoStrategy.buildRegex(subj.getData(), null, null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// obj
tripleRowRegex = spoStrategy.buildRegex(null, null, obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// po table
row = new String(serialize.get(RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO).getRow());
tripleRowRegex = poStrategy.buildRegex(null, pred.getData(), null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
tripleRowRegex = poStrategy.buildRegex(null, pred.getData(), obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
tripleRowRegex = poStrategy.buildRegex(subj.getData(), pred.getData(), obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// various regex
tripleRowRegex = poStrategy.buildRegex(null, "urn:test#pr[e|d]{2}", null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// does not match
tripleRowRegex = poStrategy.buildRegex(null, "hello", null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertFalse(matcher.matches());
}
use of org.apache.rya.api.resolver.triple.TripleRowRegex in project incubator-rya by apache.
the class AbstractTriplePatternStrategyTest method testRegex.
public void testRegex() throws Exception {
RyaURI subj = new RyaURI("urn:test#1234");
RyaURI pred = new RyaURI("urn:test#pred");
RyaURI obj = new RyaURI("urn:test#obj");
RyaStatement ryaStatement = new RyaStatement(subj, pred, obj);
Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, TripleRow> serialize = new WholeRowTripleResolver().serialize(ryaStatement);
TripleRow tripleRow = serialize.get(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO);
String row = new String(tripleRow.getRow());
TriplePatternStrategy spoStrategy = new SpoWholeRowTriplePatternStrategy();
TriplePatternStrategy poStrategy = new PoWholeRowTriplePatternStrategy();
TriplePatternStrategy ospStrategy = new OspWholeRowTriplePatternStrategy();
// pred
TripleRowRegex tripleRowRegex = spoStrategy.buildRegex(null, pred.getData(), null, null, null);
Pattern p = Pattern.compile(tripleRowRegex.getRow());
Matcher matcher = p.matcher(row);
assertTrue(matcher.matches());
// subj
tripleRowRegex = spoStrategy.buildRegex(subj.getData(), null, null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// obj
tripleRowRegex = spoStrategy.buildRegex(null, null, obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// po table
row = new String(serialize.get(RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO).getRow());
tripleRowRegex = poStrategy.buildRegex(null, pred.getData(), null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
tripleRowRegex = poStrategy.buildRegex(null, pred.getData(), obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
tripleRowRegex = poStrategy.buildRegex(subj.getData(), pred.getData(), obj.getData(), null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// various regex
tripleRowRegex = poStrategy.buildRegex(null, "urn:test#pr[e|d]{2}", null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertTrue(matcher.matches());
// does not match
tripleRowRegex = poStrategy.buildRegex(null, "hello", null, null, null);
p = Pattern.compile(tripleRowRegex.getRow());
matcher = p.matcher(row);
assertFalse(matcher.matches());
}
Aggregations