Search in sources :

Example 76 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class WorkflowDataset method scan.

/**
 * This function scans the workflow.stats dataset for a list of workflow runs in a time range.
 *
 * @param id The workflow id
 * @param timeRangeStart Start of the time range that the scan should begin from
 * @param timeRangeEnd End of the time range that the scan should end at
 * @return List of WorkflowRunRecords
 */
private List<WorkflowRunRecord> scan(WorkflowId id, long timeRangeStart, long timeRangeEnd) {
    byte[] startRowKey = getRowKeyBuilder(id, timeRangeStart).build().getKey();
    byte[] endRowKey = getRowKeyBuilder(id, timeRangeEnd).build().getKey();
    Scan scan = new Scan(startRowKey, endRowKey);
    Scanner scanner = table.scan(scan);
    Row indexRow;
    List<WorkflowRunRecord> workflowRunRecordList = new ArrayList<>();
    while ((indexRow = scanner.next()) != null) {
        Map<byte[], byte[]> columns = indexRow.getColumns();
        String workflowRunId = Bytes.toString(columns.get(RUNID));
        long timeTaken = Bytes.toLong(columns.get(TIME_TAKEN));
        List<ProgramRun> programRunList = GSON.fromJson(Bytes.toString(columns.get(NODES)), PROGRAM_RUNS_TYPE);
        WorkflowRunRecord workflowRunRecord = new WorkflowRunRecord(workflowRunId, timeTaken, programRunList);
        workflowRunRecordList.add(workflowRunRecord);
    }
    return workflowRunRecordList;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ArrayList(java.util.ArrayList) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row)

Example 77 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class WorkflowDataset method delete.

public void delete(ApplicationId id) {
    MDSKey mdsKey = new MDSKey.Builder().add(id.getNamespace()).add(id.getApplication()).build();
    Scanner scanner = table.scan(mdsKey.getKey(), Bytes.stopKeyForPrefix(mdsKey.getKey()));
    Row row;
    try {
        while ((row = scanner.next()) != null) {
            table.delete(row.getRow());
        }
    } finally {
        scanner.close();
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row)

Example 78 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class DataQualityAppTest method testDefaultConfig.

@Test
public void testDefaultConfig() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("DiscreteValuesHistogram");
    testMap.put("content_length", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    Table logDataStore = (Table) getDataset("dataQuality").get();
    DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
    Row row;
    try (Scanner scanner = logDataStore.scan(null, null)) {
        while ((row = scanner.next()) != null) {
            if (Bytes.toString(row.getRow()).contains("content_length")) {
                Map<byte[], byte[]> columnsMapBytes = row.getColumns();
                byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
                if (output != null) {
                    discreteValuesHistogramAggregationFunction.combine(output);
                }
            }
        }
    }
    Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("256", 3);
    Assert.assertEquals(expectedMap, outputMap);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) Table(co.cask.cdap.api.dataset.table.Table) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Row(co.cask.cdap.api.dataset.table.Row) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Aggregations

Scanner (co.cask.cdap.api.dataset.table.Scanner)78 Row (co.cask.cdap.api.dataset.table.Row)67 Scan (co.cask.cdap.api.dataset.table.Scan)14 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)13 Table (co.cask.cdap.api.dataset.table.Table)12 Map (java.util.Map)11 DatasetId (co.cask.cdap.proto.id.DatasetId)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)6 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Put (co.cask.cdap.api.dataset.table.Put)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 SortedMap (java.util.SortedMap)5 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)4 Get (co.cask.cdap.api.dataset.table.Get)4 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)4 ProgramSchedule (co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule)4