Examples with Scanner - co.cask.cdap.api.dataset.table.Scanner

Example 1 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class DatasetBasedStreamSizeScheduleStore method upgradeVersionKeys.

// Return whether the upgrade process is complete - determined by checking if there were no rows that were
// upgraded after the invocation of this method.
private boolean upgradeVersionKeys(Table table, int maxNumberUpdateRows) {
    int numRowsUpgraded = 0;
    try (Scanner scan = getScannerWithPrefix(table, KEY_PREFIX)) {
        Row next;
        // Upgrade only N rows in one transaction to reduce the probability of conflicts with regular Store operations.
        while (((next = scan.next()) != null) && (numRowsUpgraded < maxNumberUpdateRows)) {
            if (isInvalidRow(next)) {
                LIMITED_LOG.debug("Stream Sized Schedule entry with Row key {} does not have all columns.", Bytes.toString(next.getRow()));
                continue;
            }
            byte[] oldRowKey = next.getRow();
            String oldRowKeyString = Bytes.toString(next.getRow());
            String[] splits = oldRowKeyString.split(":");
            // streamSizeSchedule:namespace:application:type:program:schedule
            if (splits.length != 6) {
                LIMITED_LOG.debug("Skip upgrading StreamSizeSchedule {}. Expected row key " + "format 'streamSizeSchedule:namespace:application:type:program:schedule'", oldRowKeyString);
                continue;
            }
            // append application version after application name
            byte[] newRowKey = Bytes.toBytes(ScheduleUpgradeUtil.getNameWithDefaultVersion(splits, 3));
            // Check if a newRowKey is already present, if it is present, then simply delete the oldRowKey and continue;
            Row row = table.get(newRowKey);
            if (!row.isEmpty()) {
                table.delete(oldRowKey);
                numRowsUpgraded++;
                continue;
            }
            Put put = new Put(newRowKey);
            for (Map.Entry<byte[], byte[]> colValEntry : next.getColumns().entrySet()) {
                put.add(colValEntry.getKey(), colValEntry.getValue());
            }
            table.put(put);
            table.delete(oldRowKey);
            numRowsUpgraded++;
        }
    }
    // If no rows were upgraded, notify that the upgrade process has completed.
    return (numRowsUpgraded == 0);
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) Put(co.cask.cdap.api.dataset.table.Put)

Example 2 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class DatasetBasedStreamSizeScheduleStoreTest method testDeletion.

private void testDeletion(final ProgramId programId) throws Exception {
    final boolean defaultVersion = programId.getVersion().equals(ApplicationId.DEFAULT_VERSION);
    DatasetId storeTable = NamespaceId.SYSTEM.dataset(ScheduleStoreTableUtil.SCHEDULE_STORE_DATASET_NAME);
    final Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    TransactionExecutor txnl = txExecutorFactory.createExecutor(ImmutableList.of((TransactionAware) table));
    final byte[] startKey = Bytes.toBytes(DatasetBasedStreamSizeScheduleStore.KEY_PREFIX);
    final byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            Assert.assertNull(scanner.next());
            scanner.close();
        }
    });
    // Create one stream schedule - this will be persisted with new format
    scheduleStore.persist(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1, MAP_1, 0L, 0L, 0L, 0L, true);
    // Create one stream schedule - based on the old format
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Create a programId without version so that we can create a old format schedule
            ProgramId defaultProgramId = new ProgramId(programId.getNamespace(), programId.getApplication(), programId.getType(), programId.getProgram());
            String newRowKey = scheduleStore.getRowKey(defaultProgramId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
            Row row = table.get(Bytes.toBytes(scheduleStore.getRowKey(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName())));
            Assert.assertFalse(row.isEmpty());
            byte[] oldRowKey = Bytes.toBytes(scheduleStore.removeAppVersion(newRowKey));
            for (Map.Entry<byte[], byte[]> entry : row.getColumns().entrySet()) {
                table.put(oldRowKey, entry.getKey(), entry.getValue());
            }
        }
    });
    // Make sure there are only two stream size schedules
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            int numRows = 0;
            while (true) {
                Row row = scanner.next();
                if (row == null) {
                    break;
                }
                numRows++;
            }
            scanner.close();
            Assert.assertEquals(2, numRows);
        }
    });
    // This delete should have deleted both the old and new row format
    scheduleStore.delete(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            if (defaultVersion) {
                Assert.assertNull(scanner.next());
            } else {
                Assert.assertNotNull(scanner.next());
                Assert.assertNull(scanner.next());
            }
            scanner.close();
        }
    });
    // If the version is not default, we need to delete the row which didn't have a version
    if (!defaultVersion) {
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // Create a programId without version so that we can create row key to delete the old format schedule
                ProgramId defaultProgramId = new ProgramId(programId.getNamespace(), programId.getApplication(), programId.getType(), programId.getProgram());
                String newRowKey = scheduleStore.getRowKey(defaultProgramId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
                byte[] oldRowKey = Bytes.toBytes(scheduleStore.removeAppVersion(newRowKey));
                Row row = table.get(oldRowKey);
                Assert.assertFalse(row.isEmpty());
                table.delete(oldRowKey);
            }
        });
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) DatasetId(co.cask.cdap.proto.id.DatasetId) TransactionAware(org.apache.tephra.TransactionAware) Row(co.cask.cdap.api.dataset.table.Row)

Example 3 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class HBaseConsumerStateStore method getLatestConsumerGroups.

void getLatestConsumerGroups(Collection<? super ConsumerGroupConfig> result) {
    try (Scanner scanner = table.scan(barrierScanStartRow, barrierScanEndRow)) {
        // Get the last row
        Row lastRow = null;
        Row row = scanner.next();
        while (row != null) {
            lastRow = row;
            row = scanner.next();
        }
        if (lastRow == null) {
            throw new IllegalStateException("No consumer group information. Queue: " + queueName);
        }
        for (Map.Entry<byte[], byte[]> entry : lastRow.getColumns().entrySet()) {
            result.add(GSON.fromJson(new String(entry.getValue(), Charsets.UTF_8), ConsumerGroupConfig.class));
        }
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) QueueEntryRow(co.cask.cdap.data2.transaction.queue.QueueEntryRow) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConsumerGroupConfig(co.cask.cdap.data2.queue.ConsumerGroupConfig)

Example 4 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class HBaseConsumerStateStore method configureGroups.

@Override
public void configureGroups(Iterable<? extends ConsumerGroupConfig> groupConfigs) {
    com.google.common.collect.Table<Long, Integer, byte[]> startRows = fetchAllStartRows();
    // Writes a new barrier info for all the groups
    byte[] startRow = QueueEntryRow.getQueueEntryRowKey(queueName, transaction.getWritePointer(), 0);
    Put put = new Put(Bytes.add(queueName.toBytes(), startRow));
    Set<Long> groupsIds = Sets.newHashSet();
    for (ConsumerGroupConfig groupConfig : groupConfigs) {
        long groupId = groupConfig.getGroupId();
        if (!groupsIds.add(groupId)) {
            throw new IllegalArgumentException("Same consumer group is provided multiple times");
        }
        put.add(Bytes.toBytes(groupId), GSON.toJson(groupConfig));
        // For new instance, set the start row to barrier start row
        for (int instanceId = 0; instanceId < groupConfig.getGroupSize(); instanceId++) {
            if (!startRows.contains(groupId, instanceId)) {
                table.put(queueName.toBytes(), getConsumerStateColumn(groupId, instanceId), startRow);
            }
        }
    }
    // Remove all states for groups that are removed.
    deleteRemovedGroups(table.get(queueName.toBytes()), groupsIds);
    // Remove all barriers for groups that are removed.
    // Also remove barriers that have all consumers consumed pass that barrier
    // Multimap from groupId to barrier start rows. Ordering need to be maintained as the scan order.
    Multimap<Long, byte[]> deletes = LinkedHashMultimap.create();
    try (Scanner scanner = table.scan(barrierScanStartRow, barrierScanEndRow)) {
        Row row = scanner.next();
        while (row != null) {
            deleteRemovedGroups(row, groupsIds);
            // Check all instances in all groups
            for (Map.Entry<byte[], byte[]> entry : row.getColumns().entrySet()) {
                QueueBarrier barrier = decodeBarrierInfo(row.getRow(), entry.getValue());
                if (barrier == null) {
                    continue;
                }
                long groupId = barrier.getGroupConfig().getGroupId();
                boolean delete = true;
                // Check if all instances in a group has consumed passed the current barrier
                for (int instanceId = 0; instanceId < barrier.getGroupConfig().getGroupSize(); instanceId++) {
                    byte[] consumerStartRow = startRows.get(groupId, instanceId);
                    if (consumerStartRow == null || Bytes.compareTo(consumerStartRow, barrier.getStartRow()) < 0) {
                        delete = false;
                        break;
                    }
                }
                if (delete) {
                    deletes.put(groupId, row.getRow());
                }
            }
            row = scanner.next();
        }
    }
    // Remove barries that have all consumers consumed passed it
    for (Map.Entry<Long, Collection<byte[]>> entry : deletes.asMap().entrySet()) {
        // Retains the last barrier info
        if (entry.getValue().size() <= 1) {
            continue;
        }
        Deque<byte[]> rows = Lists.newLinkedList(entry.getValue());
        rows.removeLast();
        byte[] groupColumn = Bytes.toBytes(entry.getKey());
        for (byte[] rowKey : rows) {
            table.delete(rowKey, groupColumn);
        }
    }
    table.put(put);
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Put(co.cask.cdap.api.dataset.table.Put) Collection(java.util.Collection) QueueEntryRow(co.cask.cdap.data2.transaction.queue.QueueEntryRow) Row(co.cask.cdap.api.dataset.table.Row) ConsumerGroupConfig(co.cask.cdap.data2.queue.ConsumerGroupConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 5 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class HBaseConsumerStateStore method clear.

/**
 * Remove all states related to the queue that this state store is representing.
 */
void clear() {
    // Scan and delete all barrier rows
    try (Scanner scanner = table.scan(barrierScanStartRow, barrierScanEndRow)) {
        Row row = scanner.next();
        while (row != null) {
            table.delete(row.getRow());
            row = scanner.next();
        }
        // Also delete the consumer state rows
        table.delete(queueName.toBytes());
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) QueueEntryRow(co.cask.cdap.data2.transaction.queue.QueueEntryRow) Row(co.cask.cdap.api.dataset.table.Row)

Aggregations

Scanner (co.cask.cdap.api.dataset.table.Scanner)78 Row (co.cask.cdap.api.dataset.table.Row)67 Scan (co.cask.cdap.api.dataset.table.Scan)14 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)13 Table (co.cask.cdap.api.dataset.table.Table)12 Map (java.util.Map)11 DatasetId (co.cask.cdap.proto.id.DatasetId)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)6 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Put (co.cask.cdap.api.dataset.table.Put)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 SortedMap (java.util.SortedMap)5 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)4 Get (co.cask.cdap.api.dataset.table.Get)4 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)4 ProgramSchedule (co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule)4