Search in sources :

Example 51 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetricsTableTest method testFuzzyScan.

@Test
public void testFuzzyScan() throws Exception {
    MetricsTable table = getTable("testFuzzyScan");
    NavigableMap<byte[], SortedMap<byte[], Long>> writes = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    byte[] abc = { 'a', 'b', 'c' };
    for (byte b1 : abc) {
        for (byte b2 : abc) {
            for (byte b3 : abc) {
                for (byte b4 : abc) {
                    // we put two columns, but will scan only one column
                    writes.put(new byte[] { b1, b2, b3, b4 }, mapOf(A, Bytes.toLong(X)));
                }
            }
        }
    }
    table.put(writes);
    // we should have 81 (3^4) rows now
    Assert.assertEquals(81, countRange(table, null, null));
    // now do a fuzzy scan of the table
    FuzzyRowFilter filter = new FuzzyRowFilter(ImmutableList.of(ImmutablePair.of(new byte[] { '*', 'b', '*', 'b' }, new byte[] { 0x01, 0x00, 0x01, 0x00 })));
    Scanner scanner = table.scan(null, null, filter);
    int count = 0;
    while (true) {
        Row entry = scanner.next();
        if (entry == null) {
            break;
        }
        Assert.assertTrue(entry.getRow()[1] == 'b' && entry.getRow()[3] == 'b');
        Assert.assertEquals(1, entry.getColumns().size());
        Assert.assertTrue(entry.getColumns().containsKey(A));
        count++;
    }
    Assert.assertEquals(9, count);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) SortedMap(java.util.SortedMap) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 52 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class IndexedTableTest method testIncrementIndexing.

@Test
public void testIncrementIndexing() throws Exception {
    DatasetId incrTabInstance = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("incrtab");
    dsFrameworkUtil.createInstance("indexedTable", incrTabInstance, DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "idx1,idx2,idx3").build());
    final IndexedTable iTable = dsFrameworkUtil.getInstance(incrTabInstance);
    final byte[] idxCol1 = Bytes.toBytes("idx1");
    final byte[] idxCol2 = Bytes.toBytes("idx2");
    final byte[] idxCol3 = Bytes.toBytes("idx3");
    final byte[] row1 = Bytes.toBytes("row1");
    try {
        TransactionExecutor tx = dsFrameworkUtil.newTransactionExecutor(iTable);
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                long result = iTable.incrementAndGet(row1, idxCol1, 1);
                assertEquals(1L, result);
            }
        });
        final byte[] oneBytes = Bytes.toBytes(1L);
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                try (Scanner scanner = iTable.readByIndex(idxCol1, oneBytes)) {
                    Row row = scanner.next();
                    TableAssert.assertRow(row, row1, new byte[][] { idxCol1 }, new byte[][] { oneBytes });
                    assertEmpty(scanner);
                }
            }
        });
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                long result = iTable.incrementAndGet(row1, idxCol1, 1);
                assertEquals(2L, result);
            }
        });
        final byte[] twoBytes = Bytes.toBytes(2L);
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // previous index by value 1 should be gone
                Scanner scanner = iTable.readByIndex(idxCol1, oneBytes);
                try {
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                // should now be indexed by value 2
                scanner = iTable.readByIndex(idxCol1, twoBytes);
                try {
                    Row row = scanner.next();
                    TableAssert.assertRow(row, row1, new byte[][] { idxCol1 }, new byte[][] { twoBytes });
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
            }
        });
        final byte[] threeBytes = Bytes.toBytes(3L);
        final byte[][] idxCols = new byte[][] { idxCol1, idxCol2, idxCol3 };
        final byte[][] expectedValues = new byte[][] { threeBytes, oneBytes, oneBytes };
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                Row result = iTable.incrementAndGet(row1, idxCols, new long[] { 1, 1, 1 });
                assertNotNull(result);
                TableAssert.assertColumns(result, idxCols, expectedValues);
            }
        });
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                Scanner scanner = iTable.readByIndex(idxCol1, threeBytes);
                try {
                    Row row = scanner.next();
                    TableAssert.assertRow(row, row1, idxCols, expectedValues);
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                scanner = iTable.readByIndex(idxCol2, oneBytes);
                try {
                    Row row = scanner.next();
                    TableAssert.assertRow(row, row1, idxCols, expectedValues);
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                scanner = iTable.readByIndex(idxCol3, oneBytes);
                try {
                    Row row = scanner.next();
                    TableAssert.assertRow(row, row1, idxCols, expectedValues);
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
            }
        });
        final byte[] row2 = Bytes.toBytes("row2");
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // read-less increment on an indexed column should throw an exception
                try {
                    iTable.increment(row2, idxCol1, 1L);
                    fail("Expected IllegalArgumentException performing increment on indexed column");
                } catch (IllegalArgumentException iae) {
                // expected
                }
                // read-less increment on a non-indexed column should succeed
                iTable.increment(row2, valCol, 1L);
                byte[] result = iTable.get(row2, valCol);
                assertArrayEquals(oneBytes, result);
            }
        });
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                iTable.put(row2, valCol, valA);
            }
        });
        // increment against a column with non-long value should fail
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                try {
                    iTable.incrementAndGet(row2, valCol, 1L);
                    fail("Expected NumberFormatException from increment on a column with non-long value");
                } catch (NumberFormatException nfe) {
                // expected
                }
            }
        });
    } finally {
        dsFrameworkUtil.deleteInstance(incrTabInstance);
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) TransactionExecutor(org.apache.tephra.TransactionExecutor) Row(co.cask.cdap.api.dataset.table.Row) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 53 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class IndexedTableTest method testIndexKeyDelimiterAmbiguity.

@Test
public void testIndexKeyDelimiterAmbiguity() throws Exception {
    final byte[] a = { 'a' };
    final byte[] ab = { 'a', 0, 'b' };
    final byte[] abc = { 'a', 0, 'b', 0, 'c' };
    final byte[] bc = { 'b', 0, 'c' };
    final byte[] bcd = { 'b', 0, 'c', 'd' };
    final byte[] c = { 'c' };
    final byte[] d = { 'd' };
    final byte[] w = { 'w' };
    final byte[] x = { 'x' };
    final byte[] y = { 'y' };
    final byte[] z = { 'z' };
    DatasetId delimTabInstance = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("delimtab");
    dsFrameworkUtil.createInstance("indexedTable", delimTabInstance, DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, Bytes.toString(a) + "," + Bytes.toString(ab)).build());
    final IndexedTable iTable = dsFrameworkUtil.getInstance(delimTabInstance);
    try {
        TransactionExecutor tx = dsFrameworkUtil.newTransactionExecutor(iTable);
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                iTable.put(x, a, bc);
                iTable.put(y, ab, c);
                iTable.put(w, a, bcd);
                iTable.put(z, abc, d);
            }
        });
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // ensure that readByIndex filters teh false positive rows in index
                Scanner scanner = iTable.readByIndex(a, bc);
                try {
                    Row row = scanner.next();
                    Assert.assertNotNull(row);
                    Assert.assertArrayEquals(x, row.getRow());
                    Assert.assertArrayEquals(bc, row.get(a));
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                scanner = iTable.readByIndex(ab, c);
                try {
                    Row row = scanner.next();
                    Assert.assertNotNull(row);
                    Assert.assertArrayEquals(y, row.getRow());
                    Assert.assertArrayEquals(c, row.get(ab));
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                // ensure that scanByIndex filters the false positive rows in index
                scanner = iTable.scanByIndex(a, bcd, null);
                try {
                    Row row = scanner.next();
                    Assert.assertNotNull(row);
                    Assert.assertArrayEquals(w, row.getRow());
                    Assert.assertArrayEquals(bcd, row.get(a));
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
                scanner = iTable.scanByIndex(a, null, bcd);
                try {
                    Row row = scanner.next();
                    Assert.assertNotNull(row);
                    Assert.assertArrayEquals(x, row.getRow());
                    Assert.assertArrayEquals(bc, row.get(a));
                    assertEmpty(scanner);
                } finally {
                    scanner.close();
                }
            }
        });
    } finally {
        dsFrameworkUtil.deleteInstance(delimTabInstance);
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) TransactionExecutor(org.apache.tephra.TransactionExecutor) Row(co.cask.cdap.api.dataset.table.Row) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 54 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class IndexedTableTest method testIndexedRangeLookups.

@Test
public void testIndexedRangeLookups() throws Exception {
    DatasetId indexRangedLookupDs = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("rangeLookup");
    dsFrameworkUtil.createInstance("indexedTable", indexRangedLookupDs, DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, idxColString).build());
    final IndexedTable iTable = dsFrameworkUtil.getInstance(indexRangedLookupDs);
    TransactionExecutor txnl = dsFrameworkUtil.newTransactionExecutor(iTable);
    try {
        // start a new transaction
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // perform 5 puts, using idx values 1,2,3,4,5
                iTable.put(new Put(keyE).add(idxCol, idx4).add(valCol, valE));
                iTable.put(new Put(keyC).add(idxCol, idx1).add(valCol, valC));
                iTable.put(new Put(keyD).add(idxCol, idx5).add(valCol, valA));
                iTable.put(new Put(keyB).add(idxCol, idx2).add(valCol, valB));
                iTable.put(new Put(keyA).add(idxCol, idx3).add(valCol, valD));
            }
        });
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // do a scan using idx value range [idx2, idx5). Assert that we retrieve idx2, idx3, idx4.
                Scanner scanner = iTable.scanByIndex(idxCol, idx2, idx5);
                Row next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyB, next.getRow()));
                Assert.assertTrue(Bytes.equals(valB, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyA, next.getRow()));
                Assert.assertTrue(Bytes.equals(valD, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyE, next.getRow()));
                Assert.assertTrue(Bytes.equals(valE, next.get(valCol)));
                assertEmpty(scanner);
            }
        });
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // do a scan using idx value range [null (first row), idx3). Assert that we retrieve the values corresponding
                // to idx1, idx2.
                Scanner scanner = iTable.scanByIndex(idxCol, null, idx3);
                Row next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyC, next.getRow()));
                Assert.assertTrue(Bytes.equals(valC, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyB, next.getRow()));
                Assert.assertTrue(Bytes.equals(valB, next.get(valCol)));
                assertEmpty(scanner);
            }
        });
    } finally {
        dsFrameworkUtil.deleteInstance(indexRangedLookupDs);
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) TransactionExecutor(org.apache.tephra.TransactionExecutor) Row(co.cask.cdap.api.dataset.table.Row) Put(co.cask.cdap.api.dataset.table.Put) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 55 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
 * Returns metadata for a given set of entities
 *
 * @param targetIds entities for which metadata is required
 * @return map of entitiyId to set of metadata for that entity
 */
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
    if (targetIds.isEmpty()) {
        return Collections.emptySet();
    }
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
    for (NamespacedEntityId targetId : targetIds) {
        fuzzyKeys.add(getFuzzyKeyFor(targetId));
    }
    // Sort fuzzy keys
    Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
    // Scan using fuzzy filter. Scan returns one row per property.
    // Group the rows on namespacedId
    Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
    byte[] start = fuzzyKeys.get(0).getFirst();
    byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
    try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
        Row next;
        while ((next = scan.next()) != null) {
            MetadataEntry metadataEntry = convertRow(next);
            if (metadataEntry != null) {
                metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
            }
        }
    }
    // Create metadata objects for each entity from grouped rows
    Set<Metadata> metadataSet = new HashSet<>();
    for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
        Map<String, String> properties = new HashMap<>();
        Set<String> tags = Collections.emptySet();
        for (MetadataEntry metadataEntry : entry.getValue()) {
            if (TAGS_KEY.equals(metadataEntry.getKey())) {
                tags = splitTags(metadataEntry.getValue());
            } else {
                properties.put(metadataEntry.getKey(), metadataEntry.getValue());
            }
        }
        metadataSet.add(new Metadata(entry.getKey(), properties, tags));
    }
    return metadataSet;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Collection(java.util.Collection) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Aggregations

Scanner (co.cask.cdap.api.dataset.table.Scanner)78 Row (co.cask.cdap.api.dataset.table.Row)67 Scan (co.cask.cdap.api.dataset.table.Scan)14 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)13 Table (co.cask.cdap.api.dataset.table.Table)12 Map (java.util.Map)11 DatasetId (co.cask.cdap.proto.id.DatasetId)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)6 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Put (co.cask.cdap.api.dataset.table.Put)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 SortedMap (java.util.SortedMap)5 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)4 Get (co.cask.cdap.api.dataset.table.Get)4 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)4 ProgramSchedule (co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule)4