Search in sources :

Example 26 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class FactTableTest method testPreSplits.

@Test
public void testPreSplits() throws Exception {
    InMemoryTableService.create("presplitEntityTable");
    InMemoryTableService.create("presplitDataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
    byte[][] splits = FactTable.getSplits(3);
    long ts = System.currentTimeMillis() / 1000;
    DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
    DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
    DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
    // first agg view: dim1
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // second agg view: dim1 & dim2
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // third agg view: dim3
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // Verify all written records are spread across splits
    Scanner scanner = metricsTable.scan(null, null, null);
    Row row;
    Set<Integer> splitsWithRows = Sets.newHashSet();
    while ((row = scanner.next()) != null) {
        boolean added = false;
        for (int i = 0; i < splits.length; i++) {
            if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
                splitsWithRows.add(i);
                added = true;
                break;
            }
        }
        if (!added) {
            // falls into last split
            splitsWithRows.add(splits.length);
        }
    }
    Assert.assertEquals(3, splitsWithRows.size());
}
Also used : Measurement(co.cask.cdap.api.dataset.lib.cube.Measurement) Scanner(co.cask.cdap.api.dataset.table.Scanner) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 27 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class JobQueueDataset method markJobsForDeletion.

@Override
public void markJobsForDeletion(ScheduleId scheduleId, long markedTime) {
    byte[] keyPrefix = getRowKeyPrefix(scheduleId);
    Row row;
    try (Scanner scanner = table.scan(keyPrefix, Bytes.stopKeyForPrefix(keyPrefix))) {
        while ((row = scanner.next()) != null) {
            Job job = fromRow(row);
            // only mark jobs that are not marked yet to avoid chance of conflict with concurrent delete
            if (job.getState() != Job.State.PENDING_LAUNCH && row.get(TO_DELETE_COL) == null) {
                // jobs that are pending launch will be deleted by the launcher anyway
                table.put(row.getRow(), TO_DELETE_COL, Bytes.toBytes(markedTime));
            }
        }
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row)

Example 28 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class DatasetBasedStreamSizeScheduleStore method upgradeVersionKeys.

// Return whether the upgrade process is complete - determined by checking if there were no rows that were
// upgraded after the invocation of this method.
private boolean upgradeVersionKeys(Table table, int maxNumberUpdateRows) {
    int numRowsUpgraded = 0;
    try (Scanner scan = getScannerWithPrefix(table, KEY_PREFIX)) {
        Row next;
        // Upgrade only N rows in one transaction to reduce the probability of conflicts with regular Store operations.
        while (((next = scan.next()) != null) && (numRowsUpgraded < maxNumberUpdateRows)) {
            if (isInvalidRow(next)) {
                LIMITED_LOG.debug("Stream Sized Schedule entry with Row key {} does not have all columns.", Bytes.toString(next.getRow()));
                continue;
            }
            byte[] oldRowKey = next.getRow();
            String oldRowKeyString = Bytes.toString(next.getRow());
            String[] splits = oldRowKeyString.split(":");
            // streamSizeSchedule:namespace:application:type:program:schedule
            if (splits.length != 6) {
                LIMITED_LOG.debug("Skip upgrading StreamSizeSchedule {}. Expected row key " + "format 'streamSizeSchedule:namespace:application:type:program:schedule'", oldRowKeyString);
                continue;
            }
            // append application version after application name
            byte[] newRowKey = Bytes.toBytes(ScheduleUpgradeUtil.getNameWithDefaultVersion(splits, 3));
            // Check if a newRowKey is already present, if it is present, then simply delete the oldRowKey and continue;
            Row row = table.get(newRowKey);
            if (!row.isEmpty()) {
                table.delete(oldRowKey);
                numRowsUpgraded++;
                continue;
            }
            Put put = new Put(newRowKey);
            for (Map.Entry<byte[], byte[]> colValEntry : next.getColumns().entrySet()) {
                put.add(colValEntry.getKey(), colValEntry.getValue());
            }
            table.put(put);
            table.delete(oldRowKey);
            numRowsUpgraded++;
        }
    }
    // If no rows were upgraded, notify that the upgrade process has completed.
    return (numRowsUpgraded == 0);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) Put(co.cask.cdap.api.dataset.table.Put)

Example 29 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class ProgramScheduleStoreDataset method deleteSchedules.

/**
   * Removes all schedules for a specific application from the store.
   *
   * @param appId the application id for which to delete the schedules
   * @return the IDs of the schedules that were deleted
   */
public List<ScheduleId> deleteSchedules(ApplicationId appId) {
    List<ScheduleId> deleted = new ArrayList<>();
    // since all trigger row keys are prefixed by <scheduleRowKey>@,
    // a scan for that prefix finds exactly the schedules and all of its triggers
    byte[] prefix = keyPrefixForApplicationScan(appId);
    try (Scanner scanner = store.scan(new Scan(prefix, Bytes.stopKeyForPrefix(prefix)))) {
        Row row;
        while ((row = scanner.next()) != null) {
            store.delete(row.getRow());
            deleted.add(rowKeyToScheduleId(row.getRow()));
        }
    }
    return deleted;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ArrayList(java.util.ArrayList) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) ScheduleId(co.cask.cdap.proto.id.ScheduleId)

Example 30 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class ProgramScheduleStoreDataset method deleteSchedules.

/**
   * Removes one or more schedules from the store. Succeeds whether the schedules exist or not.
   *
   * @param scheduleIds the schedules to delete
   * @throws NotFoundException if one of the schedules does not exist in the store
   */
public void deleteSchedules(Iterable<? extends ScheduleId> scheduleIds) throws NotFoundException {
    for (ScheduleId scheduleId : scheduleIds) {
        String scheduleKey = rowKeyForSchedule(scheduleId);
        if (store.get(new Get(scheduleKey)).isEmpty()) {
            throw new NotFoundException(scheduleId);
        }
        store.delete(new Delete(scheduleKey));
        byte[] prefix = keyPrefixForTriggerScan(scheduleKey);
        try (Scanner scanner = store.scan(new Scan(prefix, Bytes.stopKeyForPrefix(prefix)))) {
            Row row;
            while ((row = scanner.next()) != null) {
                store.delete(row.getRow());
            }
        }
    }
}
Also used : Delete(co.cask.cdap.api.dataset.table.Delete) Scanner(co.cask.cdap.api.dataset.table.Scanner) Get(co.cask.cdap.api.dataset.table.Get) NotFoundException(co.cask.cdap.common.NotFoundException) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) ScheduleId(co.cask.cdap.proto.id.ScheduleId)

Aggregations

Scanner (co.cask.cdap.api.dataset.table.Scanner)68 Row (co.cask.cdap.api.dataset.table.Row)60 ArrayList (java.util.ArrayList)11 Scan (co.cask.cdap.api.dataset.table.Scan)10 Table (co.cask.cdap.api.dataset.table.Table)10 Test (org.junit.Test)10 DatasetId (co.cask.cdap.proto.id.DatasetId)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)6 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 Put (co.cask.cdap.api.dataset.table.Put)5 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 Delete (co.cask.cdap.api.dataset.table.Delete)4 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)4 ScheduleId (co.cask.cdap.proto.id.ScheduleId)4 ReadOnly (co.cask.cdap.api.annotation.ReadOnly)3 RecordScanner (co.cask.cdap.api.data.batch.RecordScanner)3