Search in sources :

Example 6 with FuzzyRowFilter

use of co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
 * Returns metadata for a given set of entities
 *
 * @param targetIds entities for which metadata is required
 * @return map of entitiyId to set of metadata for that entity
 */
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
    if (targetIds.isEmpty()) {
        return Collections.emptySet();
    }
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
    for (NamespacedEntityId targetId : targetIds) {
        fuzzyKeys.add(getFuzzyKeyFor(targetId));
    }
    // Sort fuzzy keys
    Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
    // Scan using fuzzy filter. Scan returns one row per property.
    // Group the rows on namespacedId
    Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
    byte[] start = fuzzyKeys.get(0).getFirst();
    byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
    try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
        Row next;
        while ((next = scan.next()) != null) {
            MetadataEntry metadataEntry = convertRow(next);
            if (metadataEntry != null) {
                metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
            }
        }
    }
    // Create metadata objects for each entity from grouped rows
    Set<Metadata> metadataSet = new HashSet<>();
    for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
        Map<String, String> properties = new HashMap<>();
        Set<String> tags = Collections.emptySet();
        for (MetadataEntry metadataEntry : entry.getValue()) {
            if (TAGS_KEY.equals(metadataEntry.getKey())) {
                tags = splitTags(metadataEntry.getValue());
            } else {
                properties.put(metadataEntry.getKey(), metadataEntry.getValue());
            }
        }
        metadataSet.add(new Metadata(entry.getKey(), properties, tags));
    }
    return metadataSet;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Collection(java.util.Collection) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 7 with FuzzyRowFilter

use of co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter in project cdap by caskdata.

the class LevelDBTable method scanPersisted.

@ReadOnly
@Override
protected Scanner scanPersisted(Scan scan) throws Exception {
    FuzzyRowFilter filter = null;
    if (scan.getFilter() != null) {
        // todo: currently we support only FuzzyRowFilter as an experimental feature
        if (scan.getFilter() instanceof FuzzyRowFilter) {
            filter = (FuzzyRowFilter) scan.getFilter();
        } else {
            throw new DataSetException("Unknown filter type: " + scan.getFilter());
        }
    }
    final Scanner scanner = core.scan(scan.getStartRow(), scan.getStopRow(), filter, null, tx);
    return new Scanner() {

        @Nullable
        @Override
        public Row next() {
            return LevelDBTable.this.next(scanner);
        }

        @Override
        public void close() {
            scanner.close();
        }
    };
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) DataSetException(co.cask.cdap.api.dataset.DataSetException) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) ReadOnly(co.cask.cdap.api.annotation.ReadOnly)

Example 8 with FuzzyRowFilter

use of co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter in project cdap by caskdata.

the class HBaseMetricsTable method scan.

@Override
public Scanner scan(@Nullable byte[] startRow, @Nullable byte[] stopRow, @Nullable FuzzyRowFilter filter) {
    ScanBuilder scanBuilder = tableUtil.buildScan();
    configureRangeScan(scanBuilder, startRow, stopRow, filter);
    try {
        ResultScanner resultScanner = getScanner(scanBuilder);
        return new HBaseScanner(resultScanner, columnFamily, rowKeyDistributor);
    } catch (IOException e) {
        throw new DataSetException("Scan failed on table " + tableId, e);
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) DataSetException(co.cask.cdap.api.dataset.DataSetException) ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder) IOException(java.io.IOException)

Aggregations

FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)7 Scanner (co.cask.cdap.api.dataset.table.Scanner)4 DimensionValue (co.cask.cdap.api.dataset.lib.cube.DimensionValue)3 Row (co.cask.cdap.api.dataset.table.Row)3 ImmutablePair (co.cask.cdap.common.utils.ImmutablePair)3 DataSetException (co.cask.cdap.api.dataset.DataSetException)2 ArrayList (java.util.ArrayList)2 ReadOnly (co.cask.cdap.api.annotation.ReadOnly)1 Scan (co.cask.cdap.api.dataset.table.Scan)1 MetricsTable (co.cask.cdap.data2.dataset2.lib.table.MetricsTable)1 InMemoryMetricsTable (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)1 ScanBuilder (co.cask.cdap.data2.util.hbase.ScanBuilder)1 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)1