Search in sources :

Example 51 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataStoreDataset method get.

@Nullable
public <T> T get(MDSKey id, Type typeOfT) {
    Row row = table.get(id.getKey());
    if (row.isEmpty()) {
        return null;
    }
    byte[] value = row.get(COLUMN);
    if (value == null) {
        return null;
    }
    return deserialize(value, typeOfT);
}
Also used : Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 52 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataStoreDataset method getFirst.

// returns first that matches
@Nullable
public <T> T getFirst(MDSKey id, Type typeOfT) {
    try {
        Scanner scan = table.scan(id.getKey(), Bytes.stopKeyForPrefix(id.getKey()));
        try {
            Row row = scan.next();
            if (row == null || row.isEmpty()) {
                return null;
            }
            byte[] value = row.get(COLUMN);
            if (value == null) {
                return null;
            }
            return deserialize(value, typeOfT);
        } finally {
            scan.close();
        }
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 53 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class LevelDBQueueEvictor method doEvict.

private synchronized int doEvict(Transaction transaction) throws IOException {
    final byte[] stopRow = QueueEntryRow.getStopRowForTransaction(queueRowPrefix, transaction);
    Row row;
    List<byte[]> rowsToDelete = Lists.newArrayList();
    // the scan must be non-transactional in order to see the state columns (which have latest timestamp)
    try (Scanner scanner = core.scan(queueRowPrefix, stopRow, null, null, Transaction.ALL_VISIBLE_LATEST)) {
        while ((row = scanner.next()) != null) {
            int processed = 0;
            for (Map.Entry<byte[], byte[]> entry : row.getColumns().entrySet()) {
                // is it a state column for a consumer instance?
                if (!QueueEntryRow.isStateColumn(entry.getKey())) {
                    continue;
                }
                // is the write pointer of this state committed w.r.t. the current transaction, and is it processed?
                if (QueueEntryRow.isCommittedProcessed(entry.getValue(), transaction)) {
                    ++processed;
                }
            }
            if (processed >= numGroups) {
                rowsToDelete.add(row.getRow());
            }
        }
    }
    if (!rowsToDelete.isEmpty()) {
        core.deleteRows(rowsToDelete);
        LOG.trace("Evicted {} entries from queue {}", rowsToDelete.size(), name);
    } else {
        LOG.trace("Nothing to evict from queue {}", name);
    }
    return rowsToDelete.size();
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) QueueEntryRow(co.cask.cdap.data2.transaction.queue.QueueEntryRow) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map)

Example 54 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

@Test
public void testPartitionedFileSetWithMR() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class);
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("2", row.getString("y"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertNull(row.get("y"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 55 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class IndexedTableTest method assertEmpty.

/**
   * Asserts that the given scanner contains no more rows.
   */
private void assertEmpty(Scanner scanner) {
    Row row = scanner.next();
    Assert.assertNull(row);
}
Also used : Row(co.cask.cdap.api.dataset.table.Row)

Aggregations

Row (co.cask.cdap.api.dataset.table.Row)111 Scanner (co.cask.cdap.api.dataset.table.Scanner)60 Test (org.junit.Test)23 Table (co.cask.cdap.api.dataset.table.Table)20 Get (co.cask.cdap.api.dataset.table.Get)16 ArrayList (java.util.ArrayList)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)16 Map (java.util.Map)15 Put (co.cask.cdap.api.dataset.table.Put)14 HashMap (java.util.HashMap)10 Scan (co.cask.cdap.api.dataset.table.Scan)9 TransactionAware (org.apache.tephra.TransactionAware)9 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)8 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)8 DatasetId (co.cask.cdap.proto.id.DatasetId)8 IOException (java.io.IOException)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Transaction (org.apache.tephra.Transaction)7 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)6 Schema (co.cask.cdap.api.data.schema.Schema)6