Search in sources :

Example 1 with Column

use of org.apache.accumulo.core.data.Column in project accumulo by apache.

the class FilterTest method test5.

@Test
public void test5() throws IOException {
    Value dv = new Value();
    TreeMap<Key, Value> tm = new TreeMap<>();
    tm.put(new Key(new Text(String.format("%03d", 1)), new Text("a"), new Text("x")), dv);
    tm.put(new Key(new Text(String.format("%03d", 2)), new Text("a"), new Text("y")), dv);
    tm.put(new Key(new Text(String.format("%03d", 3)), new Text("a"), new Text("z")), dv);
    tm.put(new Key(new Text(String.format("%03d", 4)), new Text("b"), new Text("x")), dv);
    tm.put(new Key(new Text(String.format("%03d", 5)), new Text("b"), new Text("y")), dv);
    assertEquals(5, tm.size());
    int size = size(ncqf(tm, new Column("c".getBytes(), null, null)));
    assertEquals(5, size);
    size = size(ncqf(tm, new Column("a".getBytes(), null, null)));
    assertEquals(5, size);
    size = size(ncqf(tm, new Column("a".getBytes(), "x".getBytes(), null)));
    assertEquals(1, size);
    size = size(ncqf(tm, new Column("a".getBytes(), "x".getBytes(), null), new Column("b".getBytes(), "x".getBytes(), null)));
    assertEquals(2, size);
    size = size(ncqf(tm, new Column("a".getBytes(), "x".getBytes(), null), new Column("b".getBytes(), "y".getBytes(), null)));
    assertEquals(2, size);
    size = size(ncqf(tm, new Column("a".getBytes(), "x".getBytes(), null), new Column("b".getBytes(), null, null)));
    assertEquals(3, size);
}
Also used : Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 2 with Column

use of org.apache.accumulo.core.data.Column in project accumulo by apache.

the class FilterTest method ncqf.

private SortedKeyValueIterator<Key, Value> ncqf(TreeMap<Key, Value> tm, Column... columns) throws IOException {
    HashSet<Column> hsc = new HashSet<>();
    for (Column column : columns) {
        hsc.add(column);
    }
    SortedKeyValueIterator<Key, Value> a = ColumnQualifierFilter.wrap(new SortedMapIterator(tm), hsc);
    a.seek(new Range(), EMPTY_COL_FAMS, false);
    return a;
}
Also used : Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 3 with Column

use of org.apache.accumulo.core.data.Column in project accumulo by apache.

the class FilterTest method test3.

@Test
public void test3() throws IOException {
    Value dv = new Value();
    TreeMap<Key, Value> tm = new TreeMap<>();
    HashSet<Column> hsc = new HashSet<>();
    hsc.add(new Column("c".getBytes(), null, null));
    Text colf1 = new Text("a");
    Text colq1 = new Text("b");
    Text colf2 = new Text("c");
    Text colq2 = new Text("d");
    Text colf;
    Text colq;
    for (int i = 0; i < 1000; i++) {
        if (Math.abs(Math.ceil(i / 2.0) - i / 2.0) < .001) {
            colf = colf1;
            colq = colq1;
        } else {
            colf = colf2;
            colq = colq2;
        }
        Key k = new Key(new Text(String.format("%03d", i)), colf, colq);
        k.setTimestamp(157l);
        tm.put(k, dv);
    }
    assertEquals(1000, tm.size());
    SortedKeyValueIterator<Key, Value> a = ColumnQualifierFilter.wrap(new SortedMapIterator(tm), hsc);
    a.seek(new Range(), EMPTY_COL_FAMS, false);
    assertEquals(1000, size(a));
    hsc = new HashSet<>();
    hsc.add(new Column("a".getBytes(), "b".getBytes(), null));
    a = ColumnQualifierFilter.wrap(new SortedMapIterator(tm), hsc);
    a.seek(new Range(), EMPTY_COL_FAMS, false);
    int size = size(a);
    assertEquals(500, size);
    hsc = new HashSet<>();
    a = ColumnQualifierFilter.wrap(new SortedMapIterator(tm), hsc);
    a.seek(new Range(), EMPTY_COL_FAMS, false);
    size = size(a);
    assertEquals(1000, size);
}
Also used : Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with Column

use of org.apache.accumulo.core.data.Column in project accumulo by apache.

the class ColumnFilterTest method test3.

public void test3() throws Exception {
    TreeMap<Key, Value> data = new TreeMap<>();
    data.put(newKey("r1", "cf1", "cq1"), new Value(""));
    data.put(newKey("r1", "cf2", "cq1"), new Value(""));
    data.put(newKey("r1", "cf2", "cq2"), new Value(""));
    HashSet<Column> columns = new HashSet<>();
    columns.add(newColumn("cf2", "cq1"));
    SortedKeyValueIterator<Key, Value> cf = ColumnQualifierFilter.wrap(new SortedMapIterator(data), columns);
    cf.seek(new Range(), Collections.emptySet(), false);
    Assert.assertTrue(cf.hasTop());
    Assert.assertEquals(newKey("r1", "cf2", "cq1"), cf.getTopKey());
    cf.next();
    Assert.assertFalse(cf.hasTop());
}
Also used : Column(org.apache.accumulo.core.data.Column) Value(org.apache.accumulo.core.data.Value) TreeMap(java.util.TreeMap) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 5 with Column

use of org.apache.accumulo.core.data.Column in project incubator-rya by apache.

the class AccumuloRyaQueryEngine method queryWithBindingSet.

@Override
public CloseableIteration<? extends Map.Entry<RyaStatement, BindingSet>, RyaDAOException> queryWithBindingSet(Collection<Map.Entry<RyaStatement, BindingSet>> stmts, AccumuloRdfConfiguration conf) throws RyaDAOException {
    if (conf == null) {
        conf = configuration;
    }
    // query configuration
    Authorizations authorizations = conf.getAuthorizations();
    Long ttl = conf.getTtl();
    Long maxResults = conf.getLimit();
    Integer maxRanges = conf.getMaxRangesForScanner();
    Integer numThreads = conf.getNumThreads();
    // TODO: cannot span multiple tables here
    try {
        Collection<Range> ranges = new HashSet<Range>();
        RangeBindingSetEntries rangeMap = new RangeBindingSetEntries();
        TABLE_LAYOUT layout = null;
        RyaURI context = null;
        TriplePatternStrategy strategy = null;
        RyaURI columnFamily = null;
        boolean columnFamilySet = false;
        for (Map.Entry<RyaStatement, BindingSet> stmtbs : stmts) {
            RyaStatement stmt = stmtbs.getKey();
            context = stmt.getContext();
            // Scanner will fetch all ColumnFamilies.
            if (!columnFamilySet) {
                columnFamily = context;
                columnFamilySet = true;
            } else if (columnFamily != null && !columnFamily.equals(context)) {
                columnFamily = null;
            }
            BindingSet bs = stmtbs.getValue();
            strategy = ryaContext.retrieveStrategy(stmt);
            if (strategy == null) {
                throw new IllegalArgumentException("TriplePattern[" + stmt + "] not supported");
            }
            Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext(), conf);
            // use range to set scanner
            // populate scanner based on authorizations, ttl
            layout = entry.getKey();
            ByteRange byteRange = entry.getValue();
            Range range = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
            Range rangeMapRange = range;
            // as the Value specified in the BindingSet
            if (context != null) {
                byte[] contextBytes = context.getData().getBytes("UTF-8");
                rangeMapRange = range.bound(new Column(contextBytes, new byte[] { (byte) 0x00 }, new byte[] { (byte) 0x00 }), new Column(contextBytes, new byte[] { (byte) 0xff }, new byte[] { (byte) 0xff }));
            }
            // ranges gets a Range that has no Column bounds, but
            // rangeMap gets a Range that does have Column bounds
            // If we inserted multiple Ranges with the same Row (but
            // distinct Column bounds) into the Set ranges, we would get
            // duplicate
            // results when the Row is not exact. So RyaStatements that
            // differ only in their context are all mapped to the same
            // Range (with no Column bounds) for scanning purposes.
            // However, context information is included in a Column that
            // bounds the Range inserted into rangeMap. This is because
            // in the class {@link RyaStatementBindingSetKeyValueIterator},
            // the rangeMap is
            // used to join the scan results with the BindingSets to produce
            // the query results. The additional ColumnFamily info is
            // required in this join
            // process to allow for the Statement contexts to be compared
            // with the BindingSet contexts
            // See {@link RangeBindingSetEntries#containsKey}.
            ranges.add(range);
            rangeMap.put(rangeMapRange, bs);
        }
        // no ranges. if strategy alone is null, it would be thrown in the loop above.
        if (layout == null || strategy == null) {
            return null;
        }
        String regexSubject = conf.getRegexSubject();
        String regexPredicate = conf.getRegexPredicate();
        String regexObject = conf.getRegexObject();
        TripleRowRegex tripleRowRegex = strategy.buildRegex(regexSubject, regexPredicate, regexObject, null, null);
        String table = layoutToTable(layout, conf);
        boolean useBatchScanner = ranges.size() > maxRanges;
        RyaStatementBindingSetKeyValueIterator iterator = null;
        if (useBatchScanner) {
            ScannerBase scanner = connector.createBatchScanner(table, authorizations, numThreads);
            ((BatchScanner) scanner).setRanges(ranges);
            fillScanner(scanner, columnFamily, null, ttl, null, tripleRowRegex, conf);
            iterator = new RyaStatementBindingSetKeyValueIterator(layout, ryaContext, scanner, rangeMap);
        } else {
            Scanner scannerBase = null;
            Iterator<Map.Entry<Key, Value>>[] iters = new Iterator[ranges.size()];
            int i = 0;
            for (Range range : ranges) {
                scannerBase = connector.createScanner(table, authorizations);
                scannerBase.setRange(range);
                fillScanner(scannerBase, columnFamily, null, ttl, null, tripleRowRegex, conf);
                iters[i] = scannerBase.iterator();
                i++;
            }
            iterator = new RyaStatementBindingSetKeyValueIterator(layout, Iterators.concat(iters), rangeMap, ryaContext);
        }
        if (maxResults != null) {
            iterator.setMaxResults(maxResults);
        }
        return iterator;
    } catch (Exception e) {
        throw new RyaDAOException(e);
    }
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) Scanner(org.apache.accumulo.core.client.Scanner) ByteRange(org.apache.rya.api.query.strategy.ByteRange) BatchScanner(org.apache.accumulo.core.client.BatchScanner) RyaStatement(org.apache.rya.api.domain.RyaStatement) Column(org.apache.accumulo.core.data.Column) TripleRowRegex(org.apache.rya.api.resolver.triple.TripleRowRegex) Iterator(java.util.Iterator) HashSet(java.util.HashSet) BindingSet(org.openrdf.query.BindingSet) Authorizations(org.apache.accumulo.core.security.Authorizations) TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) ScannerBase(org.apache.accumulo.core.client.ScannerBase) Text(org.apache.hadoop.io.Text) RyaRange(org.apache.rya.api.domain.RyaRange) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) IOException(java.io.IOException) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) RyaURI(org.apache.rya.api.domain.RyaURI) Value(org.apache.accumulo.core.data.Value) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Aggregations

Column (org.apache.accumulo.core.data.Column)16 Key (org.apache.accumulo.core.data.Key)10 Value (org.apache.accumulo.core.data.Value)10 HashSet (java.util.HashSet)6 TreeMap (java.util.TreeMap)6 Range (org.apache.accumulo.core.data.Range)6 SortedMapIterator (org.apache.accumulo.core.iterators.SortedMapIterator)5 Text (org.apache.hadoop.io.Text)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Scanner (org.apache.accumulo.core.client.Scanner)3 Test (org.junit.Test)3 TreeSet (java.util.TreeSet)2 BatchScanner (org.apache.accumulo.core.client.BatchScanner)2 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)2 ByteSequence (org.apache.accumulo.core.data.ByteSequence)2 IteratorAdapter (org.apache.accumulo.core.iterators.IteratorAdapter)2 Field (java.lang.reflect.Field)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1