Search in sources :

Example 1 with Scan

use of co.cask.cdap.api.dataset.table.Scan in project cdap by caskdata.

the class MetadataStoreDataset method listKV.

// returns mapping of all that match the given keySet provided they pass the combinedFilter predicate
public <T> Map<MDSKey, T> listKV(Set<MDSKey> keySet, Type typeOfT, int limit, @Nullable Predicate<KeyValue<T>> combinedFilter) {
    // Sort fuzzy keys
    List<MDSKey> sortedKeys = Lists.newArrayList(keySet);
    Collections.sort(sortedKeys);
    // Scan using fuzzy filter
    byte[] startKey = sortedKeys.get(0).getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(sortedKeys.get(sortedKeys.size() - 1).getKey());
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>();
    for (MDSKey key : sortedKeys) {
        fuzzyKeys.add(getFuzzyKeyFor(key));
    }
    Scan scan = new Scan(startKey, stopKey, new FuzzyRowFilter(fuzzyKeys));
    return listCombinedFilterKV(scan, typeOfT, limit, combinedFilter);
}
Also used : ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) ArrayList(java.util.ArrayList) Scan(co.cask.cdap.api.dataset.table.Scan)

Example 2 with Scan

use of co.cask.cdap.api.dataset.table.Scan in project cdap by caskdata.

the class BufferingTableTest method testScanWithBuffering.

/**
   * Tests that writes being buffered in memory by the client are still visible during scans.
   */
@Test
public void testScanWithBuffering() throws Exception {
    String testScanWithBuffering = "testScanWithBuffering";
    DatasetAdmin admin = getTableAdmin(CONTEXT1, testScanWithBuffering);
    admin.create();
    try {
        //
        Transaction tx1 = txClient.startShort();
        Table table1 = getTable(CONTEXT1, testScanWithBuffering);
        ((TransactionAware) table1).startTx(tx1);
        table1.put(Bytes.toBytes("1_01"), a(C1), a(V1));
        table1.put(Bytes.toBytes("1_02"), a(C1), a(V1));
        table1.put(Bytes.toBytes("1_03"), a(C1), a(V1));
        // written values should not yet be persisted
        TableAssert.assertScan(new byte[0][], new byte[0][][], ((BufferingTable) table1).scanPersisted(new Scan(Bytes.toBytes("1_"), Bytes.toBytes("2_"))));
        // buffered values should be visible in a scan
        TableAssert.assertScan(a(Bytes.toBytes("1_01"), Bytes.toBytes("1_02"), Bytes.toBytes("1_03")), aa(a(C1, V1), a(C1, V1), a(C1, V1)), table1.scan(Bytes.toBytes("1_"), Bytes.toBytes("2_")));
        Assert.assertTrue(txClient.canCommit(tx1, ((TransactionAware) table1).getTxChanges()));
        Assert.assertTrue(((TransactionAware) table1).commitTx());
        Assert.assertTrue(txClient.commit(tx1));
        Transaction tx2 = txClient.startShort();
        ((TransactionAware) table1).startTx(tx2);
        // written values should be visible after commit
        TableAssert.assertScan(a(Bytes.toBytes("1_01"), Bytes.toBytes("1_02"), Bytes.toBytes("1_03")), aa(a(C1, V1), a(C1, V1), a(C1, V1)), table1.scan(Bytes.toBytes("1_"), Bytes.toBytes("2_")));
        txClient.commit(tx2);
        Transaction tx3 = txClient.startShort();
        ((TransactionAware) table1).startTx(tx3);
        // test merging of buffered writes on existing rows
        table1.put(Bytes.toBytes("1_01"), a(C2), a(V2));
        table1.put(Bytes.toBytes("1_02"), a(C1), a(V2));
        table1.put(Bytes.toBytes("1_02a"), a(C1), a(V1));
        table1.put(Bytes.toBytes("1_02b"), a(C1), a(V1));
        table1.put(Bytes.toBytes("1_04"), a(C2), a(V2));
        // persisted values should be the same
        TableAssert.assertScan(a(Bytes.toBytes("1_01"), Bytes.toBytes("1_02"), Bytes.toBytes("1_03")), aa(a(C1, V1), a(C1, V1), a(C1, V1)), ((BufferingTable) table1).scanPersisted(new Scan(Bytes.toBytes("1_"), Bytes.toBytes("2_"))));
        // all values should be visible in buffered scan
        TableAssert.assertScan(a(Bytes.toBytes("1_01"), Bytes.toBytes("1_02"), Bytes.toBytes("1_02a"), Bytes.toBytes("1_02b"), Bytes.toBytes("1_03"), Bytes.toBytes("1_04")), aa(// 1_01
        a(C1, V1, C2, V2), // 1_02
        a(C1, V2), // 1_02a
        a(C1, V1), // 1_02b
        a(C1, V1), // 1_03
        a(C1, V1), // 1_04
        a(C2, V2)), table1.scan(Bytes.toBytes("1_"), Bytes.toBytes("2_")));
        Assert.assertTrue(txClient.canCommit(tx3, ((TransactionAware) table1).getTxChanges()));
        Assert.assertTrue(((TransactionAware) table1).commitTx());
        txClient.commit(tx3);
        Transaction tx4 = txClient.startShort();
        ((TransactionAware) table1).startTx(tx4);
        // all values should be visible after commit
        TableAssert.assertScan(a(Bytes.toBytes("1_01"), Bytes.toBytes("1_02"), Bytes.toBytes("1_02a"), Bytes.toBytes("1_02b"), Bytes.toBytes("1_03"), Bytes.toBytes("1_04")), aa(// 1_01
        a(C1, V1, C2, V2), // 1_02
        a(C1, V2), // 1_02a
        a(C1, V1), // 1_02b
        a(C1, V1), // 1_03
        a(C1, V1), // 1_04
        a(C2, V2)), table1.scan(Bytes.toBytes("1_"), Bytes.toBytes("2_")));
        txClient.commit(tx4);
    } finally {
        admin.drop();
    }
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) Scan(co.cask.cdap.api.dataset.table.Scan) Test(org.junit.Test)

Example 3 with Scan

use of co.cask.cdap.api.dataset.table.Scan in project cdap by caskdata.

the class WorkflowDataset method getNeighbors.

/**
   * Returns a map of WorkflowRunId to WorkflowRunRecord that are close to the WorkflowRunId provided by the user.
   *
   * @param id The workflow
   * @param runId The runid of the workflow
   * @param limit The limit on each side of the run that we want to see into
   * @param timeInterval The time interval that we want the results to be spaced apart
   * @return A Map of WorkflowRunId to the corresponding Workflow Run Record. A map is used so that duplicates of
   * the WorkflowRunRecord are not obtained
   */
private Map<String, WorkflowRunRecord> getNeighbors(WorkflowId id, RunId runId, int limit, long timeInterval) {
    long startTime = RunIds.getTime(runId, TimeUnit.SECONDS);
    Map<String, WorkflowRunRecord> workflowRunRecords = new HashMap<>();
    int i = -limit;
    long prevStartTime = startTime - (limit * timeInterval);
    // the last record was found if the (interval * the count of the loop) is less than the time.
    while (prevStartTime <= startTime + (limit * timeInterval)) {
        MDSKey mdsKey = getRowKeyBuilder(id, prevStartTime).build();
        byte[] startRowKey = mdsKey.getKey();
        Scan scan = new Scan(startRowKey, null);
        Scanner scanner = table.scan(scan);
        Row indexRow = scanner.next();
        if (indexRow == null) {
            return workflowRunRecords;
        }
        byte[] rowKey = indexRow.getRow();
        long time = ByteBuffer.wrap(rowKey, rowKey.length - Bytes.SIZEOF_LONG, Bytes.SIZEOF_LONG).getLong();
        if (!((time >= (startTime - (limit * timeInterval))) && time <= (startTime + (limit * timeInterval)))) {
            break;
        }
        Map<byte[], byte[]> columns = indexRow.getColumns();
        String workflowRunId = Bytes.toString(columns.get(RUNID));
        long timeTaken = Bytes.toLong(columns.get(TIME_TAKEN));
        List<ProgramRun> programRunList = GSON.fromJson(Bytes.toString(columns.get(NODES)), PROGRAM_RUNS_TYPE);
        workflowRunRecords.put(workflowRunId, new WorkflowRunRecord(workflowRunId, timeTaken, programRunList));
        prevStartTime = startTime + (i * timeInterval) < time ? time + 1 : startTime + (i * timeInterval);
        i++;
    }
    return workflowRunRecords;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row)

Example 4 with Scan

use of co.cask.cdap.api.dataset.table.Scan in project cdap by caskdata.

the class ProgramScheduleStoreDataset method deleteSchedules.

/**
   * Removes all schedules for a specific application from the store.
   *
   * @param appId the application id for which to delete the schedules
   * @return the IDs of the schedules that were deleted
   */
public List<ScheduleId> deleteSchedules(ApplicationId appId) {
    List<ScheduleId> deleted = new ArrayList<>();
    // since all trigger row keys are prefixed by <scheduleRowKey>@,
    // a scan for that prefix finds exactly the schedules and all of its triggers
    byte[] prefix = keyPrefixForApplicationScan(appId);
    try (Scanner scanner = store.scan(new Scan(prefix, Bytes.stopKeyForPrefix(prefix)))) {
        Row row;
        while ((row = scanner.next()) != null) {
            store.delete(row.getRow());
            deleted.add(rowKeyToScheduleId(row.getRow()));
        }
    }
    return deleted;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ArrayList(java.util.ArrayList) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) ScheduleId(co.cask.cdap.proto.id.ScheduleId)

Example 5 with Scan

use of co.cask.cdap.api.dataset.table.Scan in project cdap by caskdata.

the class ProgramScheduleStoreDataset method deleteSchedules.

/**
   * Removes one or more schedules from the store. Succeeds whether the schedules exist or not.
   *
   * @param scheduleIds the schedules to delete
   * @throws NotFoundException if one of the schedules does not exist in the store
   */
public void deleteSchedules(Iterable<? extends ScheduleId> scheduleIds) throws NotFoundException {
    for (ScheduleId scheduleId : scheduleIds) {
        String scheduleKey = rowKeyForSchedule(scheduleId);
        if (store.get(new Get(scheduleKey)).isEmpty()) {
            throw new NotFoundException(scheduleId);
        }
        store.delete(new Delete(scheduleKey));
        byte[] prefix = keyPrefixForTriggerScan(scheduleKey);
        try (Scanner scanner = store.scan(new Scan(prefix, Bytes.stopKeyForPrefix(prefix)))) {
            Row row;
            while ((row = scanner.next()) != null) {
                store.delete(row.getRow());
            }
        }
    }
}
Also used : Delete(co.cask.cdap.api.dataset.table.Delete) Scanner(co.cask.cdap.api.dataset.table.Scanner) Get(co.cask.cdap.api.dataset.table.Get) NotFoundException(co.cask.cdap.common.NotFoundException) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) ScheduleId(co.cask.cdap.proto.id.ScheduleId)

Aggregations

Scan (co.cask.cdap.api.dataset.table.Scan)16 Scanner (co.cask.cdap.api.dataset.table.Scanner)10 Row (co.cask.cdap.api.dataset.table.Row)9 ArrayList (java.util.ArrayList)6 Table (co.cask.cdap.api.dataset.table.Table)5 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)4 Transaction (org.apache.tephra.Transaction)4 TransactionAware (org.apache.tephra.TransactionAware)4 Test (org.junit.Test)4 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)3 ScheduleId (co.cask.cdap.proto.id.ScheduleId)3 Delete (co.cask.cdap.api.dataset.table.Delete)2 Get (co.cask.cdap.api.dataset.table.Get)2 ImmutablePair (co.cask.cdap.common.utils.ImmutablePair)2 ProgramSchedule (co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule)2 TxRunnable (co.cask.cdap.api.TxRunnable)1 ArtifactId (co.cask.cdap.api.artifact.ArtifactId)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)1 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)1