Search in sources :

Example 21 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class HBaseTableTest method testScannerCache.

private void testScannerCache(int rowsExpected, String tableName, @Nullable String property, @Nullable String argument, @Nullable String scanArgument) throws Exception {
    // Now scan and sleep for a while after each result
    Transaction tx = txClient.startShort();
    DatasetProperties props = property == null ? DatasetProperties.EMPTY : DatasetProperties.of(ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, property));
    Map<String, String> arguments = argument == null ? Collections.<String, String>emptyMap() : ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, argument);
    Scan scan = new Scan(null, null);
    if (scanArgument != null) {
        scan.setProperty(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanArgument);
    }
    Table table = getTable(CONTEXT1, tableName, props, arguments);
    ((TransactionAware) table).startTx(tx);
    Scanner scanner = table.scan(scan);
    int scanCount = 0;
    try {
        while (scanner.next() != null) {
            scanCount++;
            TimeUnit.MILLISECONDS.sleep(10);
        }
        scanner.close();
    } finally {
        LOG.info("Scanned {} rows.", scanCount);
        txClient.abort(tx);
    }
    Assert.assertEquals(rowsExpected, scanCount);
}
Also used : RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Scanner(co.cask.cdap.api.dataset.table.Scanner) BufferingTable(co.cask.cdap.data2.dataset2.lib.table.BufferingTable) Table(co.cask.cdap.api.dataset.table.Table) HTable(org.apache.hadoop.hbase.client.HTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) Scan(co.cask.cdap.api.dataset.table.Scan)

Example 22 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class HBaseTableTest method testScannerCache.

@Test
public void testScannerCache() throws Exception {
    String tableName = "scanCache";
    // note: it appears that HBase only enforces the scanner timeout after 10 seconds.
    // setting it to 3 seconds does not mean it will actually fsail after 3 sweconds.
    // therefore we have to cross the 10 seconds. here: 1200 times 10ms sleep.
    int numRows = 1200;
    DatasetAdmin admin = getTableAdmin(CONTEXT1, tableName);
    admin.create();
    try {
        // write some rows and commit
        Transaction tx1 = txClient.startShort();
        Table myTable1 = getTable(CONTEXT1, tableName);
        ((TransactionAware) myTable1).startTx(tx1);
        for (int i = 0; i < numRows; i++) {
            myTable1.put(new Put("" + i, "x", "y"));
        }
        txClient.canCommitOrThrow(tx1, ((TransactionAware) myTable1).getTxChanges());
        Assert.assertTrue(((TransactionAware) myTable1).commitTx());
        txClient.commitOrThrow(tx1);
        try {
            testScannerCache(numRows, tableName, null, null, null);
            Assert.fail("this should have failed with ScannerTimeoutException");
        } catch (Exception e) {
            // we expect a RuntimeException wrapping an HBase ScannerTimeoutException
            if (!(e.getCause() instanceof ScannerTimeoutException)) {
                throw e;
            }
        }
        // cache=100 as dataset property
        testScannerCache(numRows, tableName, "100", null, null);
        // cache=100 as dataset runtime argument
        testScannerCache(numRows, tableName, "1000", "100", null);
        // cache=100 as scan property
        testScannerCache(numRows, tableName, "5000", "1000", "100");
    } finally {
        admin.drop();
    }
}
Also used : BufferingTable(co.cask.cdap.data2.dataset2.lib.table.BufferingTable) Table(co.cask.cdap.api.dataset.table.Table) HTable(org.apache.hadoop.hbase.client.HTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) ScannerTimeoutException(org.apache.hadoop.hbase.client.ScannerTimeoutException) Put(co.cask.cdap.api.dataset.table.Put) ScannerTimeoutException(org.apache.hadoop.hbase.client.ScannerTimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) BufferingTableTest(co.cask.cdap.data2.dataset2.lib.table.BufferingTableTest) Test(org.junit.Test)

Example 23 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class DatasetOpExecutorServiceTest method testRest.

@Test
public void testRest() throws Exception {
    // check non-existence with 404
    testAdminOp(bob, "exists", 404, null);
    // add instance, should automatically create an instance
    dsFramework.addInstance("table", bob, DatasetProperties.EMPTY);
    testAdminOp(bob, "exists", 200, true);
    testAdminOp("bob", "exists", 404, null);
    // check truncate
    final Table table = dsFramework.getDataset(bob, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(table);
    TransactionExecutor txExecutor = new DefaultTransactionExecutor(new InMemoryTxSystemClient(txManager), ImmutableList.of((TransactionAware) table));
    // writing smth to table
    txExecutor.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            table.put(new Put("key1", "col1", "val1"));
        }
    });
    // verify that we can read the data
    txExecutor.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertEquals("val1", table.get(new Get("key1", "col1")).getString("col1"));
        }
    });
    testAdminOp(bob, "truncate", 200, null);
    // verify that data is no longer there
    txExecutor.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertTrue(table.get(new Get("key1", "col1")).isEmpty());
        }
    });
    // check upgrade
    testAdminOp(bob, "upgrade", 200, null);
    // drop and check non-existence
    dsFramework.deleteInstance(bob);
    testAdminOp(bob, "exists", 404, null);
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) TransactionAware(org.apache.tephra.TransactionAware) Get(co.cask.cdap.api.dataset.table.Get) DefaultTransactionExecutor(org.apache.tephra.DefaultTransactionExecutor) TransactionExecutor(org.apache.tephra.TransactionExecutor) DefaultTransactionExecutor(org.apache.tephra.DefaultTransactionExecutor) InMemoryTxSystemClient(org.apache.tephra.inmemory.InMemoryTxSystemClient) URISyntaxException(java.net.URISyntaxException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) Put(co.cask.cdap.api.dataset.table.Put) Test(org.junit.Test)

Example 24 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class PartitionConsumerTest method testSimplePartitionConsuming.

@Test
public void testSimplePartitionConsuming() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys1 = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        partitionKeys1.add(generateUniqueKey());
    }
    final Set<PartitionKey> partitionKeys2 = new HashSet<>();
    for (int i = 0; i < 15; i++) {
        partitionKeys2.add(generateUniqueKey());
    }
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys1) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
            // those partitions are added to the dataset at this point
            List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
            // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
            Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
            // with all the partition keys
            List<? extends Partition> consumedPartitions = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor()).consumePartitions().getPartitions();
            Set<PartitionKey> allKeys = new HashSet<>();
            allKeys.addAll(partitionKeys1);
            allKeys.addAll(partitionKeys2);
            Assert.assertEquals(allKeys, toKeys(consumedPartitions));
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) ConsumablePartition(co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition) Partition(co.cask.cdap.api.dataset.lib.Partition) HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) ConsumerWorkingSet(co.cask.cdap.api.dataset.lib.partitioned.ConsumerWorkingSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class PartitionConsumerTest method testNumRetries.

@Test
public void testNumRetries() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final int numRetries = 1;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(numRetries).build();
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final PartitionKey partitionKey = generateUniqueKey();
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // from the working set
            for (int i = 0; i < numRetries + 1; i++) {
                List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
                Assert.assertEquals(1, partitionDetails.size());
                Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey());
                // aborting the processing of the partition
                partitionConsumer.onFinish(partitionDetails, false);
            }
            // after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming
            PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(0, result.getPartitions().size());
            Assert.assertEquals(1, result.getFailedPartitions().size());
            Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey());
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumerResult(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Aggregations

TransactionAware (org.apache.tephra.TransactionAware)97 Test (org.junit.Test)65 TransactionExecutor (org.apache.tephra.TransactionExecutor)48 Table (co.cask.cdap.api.dataset.table.Table)39 Transaction (org.apache.tephra.Transaction)34 IOException (java.io.IOException)28 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)24 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)23 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)21 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)19 DataSetException (co.cask.cdap.api.dataset.DataSetException)16 List (java.util.List)13 Put (co.cask.cdap.api.dataset.table.Put)12 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 Row (co.cask.cdap.api.dataset.table.Row)11 ArrayList (java.util.ArrayList)11 HashSet (java.util.HashSet)11 TransactionFailureException (org.apache.tephra.TransactionFailureException)11 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)10