Search in sources :

Example 26 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method getPartitions.

private void getPartitions(@Nullable PartitionFilter filter, PartitionConsumer consumer, boolean decodeMetadata, @Nullable byte[] startKey, @Nullable byte[] endKey, long limit) {
    long count = 0L;
    try (Scanner scanner = partitionsTable.scan(startKey, endKey)) {
        while (count < limit) {
            Row row = scanner.next();
            if (row == null) {
                break;
            }
            PartitionKey key;
            try {
                key = parseRowKey(row.getRow(), partitioning);
            } catch (IllegalArgumentException e) {
                LOG.debug(String.format("Failed to parse row key for partitioned file set '%s': %s", getName(), Bytes.toStringBinary(row.getRow())));
                continue;
            }
            if (filter != null && !filter.match(key)) {
                continue;
            }
            byte[] pathBytes = row.get(RELATIVE_PATH);
            if (pathBytes != null) {
                consumer.consume(key, Bytes.toString(pathBytes), decodeMetadata ? metadataFromRow(row) : null);
            }
            count++;
        }
        if (count == 0) {
            warnIfInvalidPartitionFilter(filter, partitioning);
        }
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Row(io.cdap.cdap.api.dataset.table.Row)

Example 27 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method assertNotExists.

// Throws PartitionAlreadyExistsException if the partition key already exists.
// Otherwise, returns the rowkey corresponding to the PartitionKey.
@ReadOnly
byte[] assertNotExists(PartitionKey key, boolean supportNonTransactional) {
    byte[] rowKey = generateRowKey(key, partitioning);
    if (tx == null && supportNonTransactional) {
        if (LOG.isWarnEnabled()) {
            StringBuilder sb = new StringBuilder();
            for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) {
                sb.append("\n\tat ").append(stackTraceElement.toString());
            }
            SAMPLING_LOG.warn("Operation should be performed within a transaction. " + "This operation may require a transaction in the future. {}", sb);
        }
        // to handle backwards compatibility (user might have called PartitionedFileSet#getPartitionOutput outside
        // of a transaction), we can't check partition existence via the partitionsTable. As an fallback approach,
        // check the filesystem.
        Location partitionLocation = files.getLocation(getOutputPath(key));
        if (exists(partitionLocation)) {
            throw new DataSetException(String.format("Location %s for partition key %s already exists: ", partitionLocation, key));
        }
    } else {
        Row row = partitionsTable.get(rowKey);
        if (!row.isEmpty()) {
            throw new PartitionAlreadyExistsException(getName(), key);
        }
    }
    return rowKey;
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Row(io.cdap.cdap.api.dataset.table.Row) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) Location(org.apache.twill.filesystem.Location) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Example 28 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class WorkerProgramRunnerTest method testWorkerWithMisbehavedDataset.

@Test
public void testWorkerWithMisbehavedDataset() throws Throwable {
    final ApplicationWithPrograms app = AppFabricTestHelper.deployApplicationWithManager(AppWithMisbehavedDataset.class, TEMP_FOLDER_SUPPLIER);
    final ProgramController controller = startProgram(app, AppWithMisbehavedDataset.TableWriter.class);
    Tasks.waitFor(ProgramController.State.COMPLETED, new Callable<ProgramController.State>() {

        @Override
        public ProgramController.State call() throws Exception {
            return controller.getState();
        }
    }, 30, TimeUnit.SECONDS);
    // validate worker was able to execute its second transaction
    final TransactionExecutor executor = txExecutorFactory.createExecutor(datasetCache);
    executor.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Table table = datasetCache.getDataset(AppWithMisbehavedDataset.TABLE);
            Row result = table.get(new Get(AppWithMisbehavedDataset.ROW, AppWithMisbehavedDataset.COLUMN));
            Assert.assertEquals(AppWithMisbehavedDataset.VALUE, result.getString(AppWithMisbehavedDataset.COLUMN));
        }
    });
}
Also used : ProgramController(io.cdap.cdap.app.runtime.ProgramController) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) Get(io.cdap.cdap.api.dataset.table.Get) TransactionExecutor(org.apache.tephra.TransactionExecutor) AppWithMisbehavedDataset(io.cdap.cdap.AppWithMisbehavedDataset) Row(io.cdap.cdap.api.dataset.table.Row) IOException(java.io.IOException) Test(org.junit.Test)

Example 29 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class TableTest method testEmptyDelete.

@Test
public void testEmptyDelete() throws Exception {
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
    admin.create();
    try (Table myTable = getTable(CONTEXT1, MY_TABLE)) {
        Transaction tx = txClient.startShort();
        ((TransactionAware) myTable).startTx(tx);
        myTable.put(R1, C1, V1);
        myTable.put(R1, C2, V2);
        myTable.put(R1, C3, V3);
        // specifying empty columns means to delete nothing
        myTable.delete(R1, new byte[][] {});
        myTable.delete(new Delete(R1, new byte[][] {}));
        myTable.delete(new Delete(R1, ImmutableList.<byte[]>of()));
        myTable.delete(new Delete(Bytes.toString(R1), new String[] {}));
        myTable.delete(new Delete(Bytes.toString(R1), ImmutableList.<String>of()));
        // verify the above delete calls deleted none of the rows
        Row row = myTable.get(R1);
        Assert.assertEquals(3, row.getColumns().size());
        Assert.assertArrayEquals(R1, row.getRow());
        Assert.assertArrayEquals(V1, row.get(C1));
        Assert.assertArrayEquals(V2, row.get(C2));
        Assert.assertArrayEquals(V3, row.get(C3));
        // test deletion of only one column
        Delete delete = new Delete(R1);
        Assert.assertNull(delete.getColumns());
        delete.add(C1);
        Assert.assertNotNull(delete.getColumns());
        myTable.delete(delete);
        row = myTable.get(R1);
        Assert.assertEquals(2, row.getColumns().size());
        Assert.assertArrayEquals(R1, row.getRow());
        Assert.assertArrayEquals(V2, row.get(C2));
        Assert.assertArrayEquals(V3, row.get(C3));
        // test delete of all columns
        myTable.delete(new Delete(R1));
        Assert.assertEquals(0, myTable.get(R1).getColumns().size());
        txClient.abort(tx);
    } finally {
        admin.drop();
    }
}
Also used : Delete(io.cdap.cdap.api.dataset.table.Delete) Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 30 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class TableTest method testMultiGetWithTx.

@Test
public void testMultiGetWithTx() throws Exception {
    String testMultiGet = "testMultiGet";
    DatasetAdmin admin = getTableAdmin(CONTEXT1, testMultiGet);
    admin.create();
    try (Table table = getTable(CONTEXT1, testMultiGet)) {
        Transaction tx = txClient.startShort();
        ((TransactionAware) table).startTx(tx);
        for (int i = 0; i < 100; i++) {
            table.put(new Put(Bytes.toBytes("r" + i)).add(C1, V1).add(C2, V2));
        }
        txClient.canCommitOrThrow(tx, ((TransactionAware) table).getTxChanges());
        Assert.assertTrue(((TransactionAware) table).commitTx());
        txClient.commitOrThrow(tx);
        Transaction tx2 = txClient.startShort();
        ((TransactionAware) table).startTx(tx2);
        List<Get> gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            gets.add(new Get(Bytes.toBytes("r" + i)));
        }
        List<Row> results = table.get(gets);
        txClient.commitOrThrow(tx2);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val = row.get(C1);
            Assert.assertNotNull(val);
            Assert.assertArrayEquals(V1, val);
            byte[] val2 = row.get(C2);
            Assert.assertNotNull(val2);
            Assert.assertArrayEquals(V2, val2);
        }
        Transaction tx3 = txClient.startShort();
        ((TransactionAware) table).startTx(tx3);
        gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            gets.add(new Get("r" + i).add(C1));
        }
        results = table.get(gets);
        txClient.commitOrThrow(tx3);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val = row.get(C1);
            Assert.assertNotNull(val);
            Assert.assertArrayEquals(V1, val);
            // should have only returned column 1
            byte[] val2 = row.get(C2);
            Assert.assertNull(val2);
        }
        // retrieve different columns per row
        Transaction tx4 = txClient.startShort();
        ((TransactionAware) table).startTx(tx4);
        gets = Lists.newArrayListWithCapacity(100);
        for (int i = 0; i < 100; i++) {
            Get get = new Get("r" + i);
            // evens get C1, odds get C2
            get.add(i % 2 == 0 ? C1 : C2);
            gets.add(get);
        }
        results = table.get(gets);
        txClient.commitOrThrow(tx4);
        for (int i = 0; i < 100; i++) {
            Row row = results.get(i);
            Assert.assertArrayEquals(Bytes.toBytes("r" + i), row.getRow());
            byte[] val1 = row.get(C1);
            byte[] val2 = row.get(C2);
            if (i % 2 == 0) {
                Assert.assertNotNull(val1);
                Assert.assertArrayEquals(V1, val1);
                Assert.assertNull(val2);
            } else {
                Assert.assertNull(val1);
                Assert.assertNotNull(val2);
                Assert.assertArrayEquals(V2, val2);
            }
        }
    } finally {
        admin.drop();
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Get(io.cdap.cdap.api.dataset.table.Get) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put) Test(org.junit.Test)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10