Search in sources :

Example 61 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class TableTest method testEmptyGet.

@Test
public void testEmptyGet() throws Exception {
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
    admin.create();
    try (Table myTable = getTable(CONTEXT1, MY_TABLE)) {
        Transaction tx = txClient.startShort();
        ((TransactionAware) myTable).startTx(tx);
        myTable.put(R1, C1, V1);
        myTable.put(R1, C2, V2);
        // to be used for validation later
        TreeMap<byte[], byte[]> expectedColumns = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        expectedColumns.put(C1, V1);
        expectedColumns.put(C2, V2);
        Result expectedResult = new Result(R1, expectedColumns);
        Result emptyResult = new Result(R1, ImmutableMap.<byte[], byte[]>of());
        ((TransactionAware) myTable).commitTx();
        txClient.commitOrThrow(tx);
        // start another transaction, so that the buffering table doesn't cache the values; the underlying Table
        // implementations are tested this way.
        tx = txClient.startShort();
        ((TransactionAware) myTable).startTx(tx);
        Row row = myTable.get(R1, new byte[][] { C1, C2 });
        assertEquals(expectedResult, row);
        // passing in empty columns returns empty result
        row = myTable.get(R1, new byte[][] {});
        assertEquals(emptyResult, row);
        // test all the Get constructors and their behavior
        // constructors specifying only rowkey retrieve all columns
        Get get = new Get(R1);
        Assert.assertNull(get.getColumns());
        assertEquals(expectedResult, myTable.get(get));
        get = new Get(Bytes.toString(R1));
        Assert.assertNull(get.getColumns());
        assertEquals(expectedResult, myTable.get(get));
        get.add(C1);
        get.add(Bytes.toString(C2));
        assertEquals(expectedResult, myTable.get(get));
        // constructor specifying columns, but with an empty array/collection retrieve 0 columns
        get = new Get(R1, new byte[][] {});
        Assert.assertNotNull(get.getColumns());
        assertEquals(emptyResult, myTable.get(get));
        get = new Get(R1, ImmutableList.<byte[]>of());
        Assert.assertNotNull(get.getColumns());
        assertEquals(emptyResult, myTable.get(get));
        get = new Get(Bytes.toString(R1), new String[] {});
        Assert.assertNotNull(get.getColumns());
        assertEquals(emptyResult, myTable.get(get));
        get = new Get(Bytes.toString(R1), ImmutableList.<String>of());
        Assert.assertNotNull(get.getColumns());
        assertEquals(emptyResult, myTable.get(get));
        row = myTable.get(R1, new byte[][] {});
        assertEquals(emptyResult, row);
        txClient.abort(tx);
    } finally {
        admin.drop();
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Get(io.cdap.cdap.api.dataset.table.Get) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) TreeMap(java.util.TreeMap) Result(io.cdap.cdap.api.dataset.table.Result) Test(org.junit.Test)

Example 62 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class TableTest method testMultiGetWithTx.

@Test
public void testMultiGetWithTx() throws Exception {
    String testMultiGet = "testMultiGet";
    DatasetAdmin admin = getTableAdmin(CONTEXT1, testMultiGet);
    admin.create();
    try (Table table = getTable(CONTEXT1, testMultiGet)) {
        Transaction tx = txClient.startShort();
        ((TransactionAware) table).startTx(tx);
        for (int i = 0; i < 100; i++) {
            table.put(new Put(Bytes.toBytes("r" + i)).add(C1, V1).add(C2, V2));
        }
        txClient.canCommitOrThrow(tx, ((TransactionAware) table).getTxChanges());
        Assert.assertTrue(((TransactionAware) table).commitTx());
        txClient.commitOrThrow(tx);
        Transaction tx2 = txClient.startShort();
        ((TransactionAware) table).startTx(tx2);
        List<Get> gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            gets.add(new Get(Bytes.toBytes("r" + i)));
        }
        List<Row> results = table.get(gets);
        txClient.commitOrThrow(tx2);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val = row.get(C1);
            Assert.assertNotNull(val);
            Assert.assertArrayEquals(V1, val);
            byte[] val2 = row.get(C2);
            Assert.assertNotNull(val2);
            Assert.assertArrayEquals(V2, val2);
        }
        Transaction tx3 = txClient.startShort();
        ((TransactionAware) table).startTx(tx3);
        gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            gets.add(new Get("r" + i).add(C1));
        }
        results = table.get(gets);
        txClient.commitOrThrow(tx3);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val = row.get(C1);
            Assert.assertNotNull(val);
            Assert.assertArrayEquals(V1, val);
            // should have only returned column 1
            byte[] val2 = row.get(C2);
            Assert.assertNull(val2);
        }
        // retrieve different columns per row
        Transaction tx4 = txClient.startShort();
        ((TransactionAware) table).startTx(tx4);
        gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            Get get = new Get("r" + i);
            // evens get C1, odds get C2
            get.add(i % 2 == 0 ? C1 : C2);
            gets.add(get);
        }
        results = table.get(gets);
        txClient.commitOrThrow(tx4);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val1 = row.get(C1);
            byte[] val2 = row.get(C2);
            if (i % 2 == 0) {
                Assert.assertNotNull(val1);
                Assert.assertArrayEquals(V1, val1);
                Assert.assertNull(val2);
            } else {
                Assert.assertNull(val1);
                Assert.assertNotNull(val2);
                Assert.assertArrayEquals(V2, val2);
            }
        }
    } finally {
        admin.drop();
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Get(io.cdap.cdap.api.dataset.table.Get) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put) Test(org.junit.Test)

Example 63 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class BufferingTableTest method verify.

private void verify(BufferingTable table, byte[] row, byte[] col, byte[] val) throws Exception {
    // get column
    Assert.assertArrayEquals(val, table.get(row, col));
    // get set of columns
    Row getColSetRow = table.get(row, new byte[][] { col });
    Map<byte[], byte[]> getColSetResult = getColSetRow.getColumns();
    Assert.assertEquals(1, getColSetResult.size());
    Assert.assertArrayEquals(val, getColSetResult.get(col));
    // get row
    Row getRow = table.get(row);
    Map<byte[], byte[]> getRowResult = getRow.getColumns();
    Assert.assertEquals(1, getRowResult.size());
    Assert.assertArrayEquals(val, getRowResult.get(col));
    // scan
    Scanner scan = table.scan(row, null);
    Row next = scan.next();
    Assert.assertNotNull(next);
    Assert.assertArrayEquals(row, next.getRow());
    Assert.assertArrayEquals(val, next.get(col));
    Assert.assertNull(scan.next());
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row)

Example 64 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(io.cdap.cdap.api.dataset.lib.Partition) Table(io.cdap.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Row(io.cdap.cdap.api.dataset.table.Row)

Example 65 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class MetadataDataset method getMetadata.

/**
 * Retrieves the metadata for the specified {@link MetadataEntity}.
 *
 * @param metadataEntity the specified {@link MetadataEntity}
 * @return {@link Record} representing the metadata for the specified {@link MetadataEntity}
 */
public Record getMetadata(MetadataEntity metadataEntity) {
    MDSKey mdsKey = MetadataKey.createValueRowKey(metadataEntity, null);
    byte[] startKey = mdsKey.getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
    Map<String, String> metadata = new HashMap<>();
    try (Scanner scan = indexedTable.scan(startKey, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            String key = MetadataKey.extractMetadataKey(next.getRow());
            byte[] value = next.get(VALUE_COLUMN);
            if (key == null || value == null) {
                continue;
            }
            metadata.put(key, Bytes.toString(value));
        }
    }
    Set<String> tags = splitTags(metadata.get(MetadataConstants.TAGS_KEY));
    // safe to remove since splitTags above copies to new HashSet
    // now we are only left with properties
    metadata.remove(MetadataConstants.TAGS_KEY);
    return new Record(metadataEntity, metadata, tags);
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) MDSKey(io.cdap.cdap.data2.dataset2.lib.table.MDSKey) Row(io.cdap.cdap.api.dataset.table.Row)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10