Search in sources :

Example 96 with DatasetAdmin

use of io.cdap.cdap.api.dataset.DatasetAdmin in project cdap by cdapio.

the class BufferingTableTest method testMultiGetIncludesBuffer.

@Test
public void testMultiGetIncludesBuffer() throws Exception {
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
    admin.create();
    try (BufferingTable table = getTable(CONTEXT1, MY_TABLE)) {
        // persist some data
        Transaction tx1 = txClient.startShort();
        table.startTx(tx1);
        // writing a couple rows
        // table should look like the following, with everything in the buffer
        // c1    c2    c3    c4
        // r1       1     2     3     -
        // r2       -     3     2     1
        table.put(R1, a(C1, C2, C3), lb(1, 2, 3));
        table.put(R2, a(C2, C3, C4), lb(3, 2, 1));
        // check that multi-get can see buffered writes
        List<Row> rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
        Assert.assertEquals(2, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
        // check multi-get with gets that specify columns, and one get that should return an empty row
        rows = table.get(Lists.newArrayList(new Get(R1, C2, C3), new Get(R2, C2, C3), new Get(R3)));
        Assert.assertEquals(3, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C2, C3), lb(2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3), lb(3, 2));
        Assert.assertTrue(rows.get(2).isEmpty());
        // persist changes
        Collection<byte[]> txChanges = table.getTxChanges();
        txClient.canCommitOrThrow(tx1, txChanges);
        Assert.assertTrue(table.commitTx());
        txClient.commitOrThrow(tx1);
        table.postTxCommit();
        // start another transaction
        Transaction tx2 = txClient.startShort();
        table.startTx(tx2);
        // now add another row, delete a row, and change some column values
        // table should look like the following
        // c1    c2    c3    c4    c5
        // r1      -     -     3     2     -
        // r3      -     -     -     -     1
        table.put(R1, a(C2, C3, C4), lb(4, 3, 2));
        table.delete(R1, a(C1, C2));
        table.delete(R2);
        table.put(R3, C5, L1);
        // verify multi-get sees persisted data with buffer applied on top
        rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2), new Get(R3)));
        Assert.assertEquals(3, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C3, C4), lb(3, 2));
        Assert.assertTrue(rows.get(1).isEmpty());
        TableAssert.assertRow(rows.get(2), R3, a(C5), lb(1));
        // pretend there was a write conflict and rollback changes
        Assert.assertTrue(table.rollbackTx());
        txClient.abort(tx2);
        // start another transaction and make sure it can't see what was done before
        Transaction tx3 = txClient.startShort();
        table.startTx(tx3);
        rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
        Assert.assertEquals(2, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
    } finally {
        admin.drop();
    }
}
Also used : Transaction(org.apache.tephra.Transaction) Get(io.cdap.cdap.api.dataset.table.Get) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 97 with DatasetAdmin

use of io.cdap.cdap.api.dataset.DatasetAdmin in project cdap by cdapio.

the class BufferingTableTest method testChangingParamsAndReturnValues.

@Test
public void testChangingParamsAndReturnValues() throws Exception {
    // The test verifies that one can re-use byte arrays passed as parameters to write methods of a table without
    // affecting the stored data.
    // Also, one can re-use (modify) returned data from the table without affecting the stored data.
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(isReadlessIncrementSupported()).build();
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE, props);
    admin.create();
    try (BufferingTable table = getTable(CONTEXT1, MY_TABLE, props)) {
        // writing some data: we'll need it to test delete later
        Transaction tx = txClient.startShort();
        table.startTx(tx);
        table.put(new byte[] { 0 }, new byte[] { 9 }, new byte[] { 8 });
        table.commitTx();
        txClient.commitOrThrow(tx);
        // start new for in-mem buffer behavior testing
        tx = txClient.startShort();
        table.startTx(tx);
        // write some data but not commit
        byte[] rowParam = new byte[] { 1 };
        byte[] colParam = new byte[] { 2 };
        byte[] valParam = Bytes.toBytes(3L);
        table.put(rowParam, colParam, valParam);
        verify123(table);
        // change passed earlier byte arrays in place, this should not affect stored previously values
        rowParam[0]++;
        colParam[0]++;
        valParam[0]++;
        verify123(table);
        // try get row and change returned values in place, which should not affect the data stored
        Row getRow = table.get(new byte[] { 1 });
        Map<byte[], byte[]> getRowResult = getRow.getColumns();
        Assert.assertEquals(1, getRowResult.size());
        byte[] colFromGetRow = getRowResult.keySet().iterator().next();
        byte[] valFromGetRow = getRowResult.get(colFromGetRow);
        getRowResult.remove(new byte[] { 2 });
        Assert.assertArrayEquals(new byte[] { 2 }, colFromGetRow);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetRow);
        colFromGetRow[0]++;
        valFromGetRow[0]++;
        verify123(table);
        // try get set of columns in a row and change returned values in place, which should not affect the data stored
        Row getColumnSetRow = table.get(new byte[] { 1 });
        Map<byte[], byte[]> getColumnSetResult = getColumnSetRow.getColumns();
        Assert.assertEquals(1, getColumnSetResult.size());
        byte[] colFromGetColumnSet = getColumnSetResult.keySet().iterator().next();
        byte[] valFromGetColumnSet = getColumnSetResult.values().iterator().next();
        getColumnSetResult.remove(new byte[] { 2 });
        Assert.assertArrayEquals(new byte[] { 2 }, colFromGetColumnSet);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetColumnSet);
        colFromGetColumnSet[0]++;
        valFromGetColumnSet[0]++;
        verify123(table);
        // try get column and change returned value in place, which should not affect the data stored
        byte[] valFromGetColumn = table.get(new byte[] { 1 }, new byte[] { 2 });
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetColumn);
        valFromGetColumn[0]++;
        verify123(table);
        // try scan and change returned value in place, which should not affect the data stored
        Scanner scan = table.scan(new byte[] { 1 }, null);
        Row next = scan.next();
        Assert.assertNotNull(next);
        byte[] rowFromScan = next.getRow();
        Assert.assertArrayEquals(new byte[] { 1 }, rowFromScan);
        Map<byte[], byte[]> cols = next.getColumns();
        Assert.assertEquals(1, cols.size());
        byte[] colFromScan = cols.keySet().iterator().next();
        Assert.assertArrayEquals(new byte[] { 2 }, colFromScan);
        byte[] valFromScan = next.get(new byte[] { 2 });
        Assert.assertNotNull(valFromScan);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromScan);
        Assert.assertNull(scan.next());
        cols.remove(new byte[] { 2 });
        rowFromScan[0]++;
        colFromScan[0]++;
        valFromScan[0]++;
        verify123(table);
        // try delete and change params in place: this should not affect stored data
        rowParam = new byte[] { 1 };
        colParam = new byte[] { 2 };
        table.delete(rowParam, colParam);
        Assert.assertNull(table.get(new byte[] { 1 }, new byte[] { 2 }));
        Assert.assertArrayEquals(new byte[] { 8 }, table.get(new byte[] { 0 }, new byte[] { 9 }));
        rowParam[0] = 0;
        colParam[0] = 9;
        Assert.assertNull(table.get(new byte[] { 1 }, new byte[] { 2 }));
        Assert.assertArrayEquals(new byte[] { 8 }, table.get(new byte[] { 0 }, new byte[] { 9 }));
        // try increment column and change params in place: this should not affect stored data
        byte[] rowIncParam = new byte[] { 1 };
        byte[] colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, 3);
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try increment set of columns and change params in place, try also to change values in returned map: this all
        // should not affect stored data.
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        table.increment(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try increment and change returned values: should not affect the stored data
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        Row countersRow = table.incrementAndGet(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        Map<byte[], byte[]> counters = countersRow.getColumns();
        Assert.assertEquals(1, counters.size());
        byte[] colFromInc = counters.keySet().iterator().next();
        Assert.assertArrayEquals(new byte[] { 2 }, colFromInc);
        Assert.assertEquals(3, Bytes.toLong(counters.get(colFromInc)));
        counters.remove(new byte[] { 2 });
        colFromInc[0]++;
        verify123(table);
        // try increment write and change params in place: this should not affect stored data
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        table.increment(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try compareAndSwap and change params in place: this should not affect stored data
        byte[] rowSwapParam = new byte[] { 1 };
        byte[] colSwapParam = new byte[] { 2 };
        byte[] valSwapParam = Bytes.toBytes(3L);
        table.compareAndSwap(rowSwapParam, colSwapParam, Bytes.toBytes(3L), Bytes.toBytes(4L));
        table.compareAndSwap(rowSwapParam, colSwapParam, Bytes.toBytes(4L), valSwapParam);
        verify123(table);
        rowSwapParam[0]++;
        colSwapParam[0]++;
        valSwapParam[0]++;
        verify123(table);
    // We don't care to persist changes and commit tx here: we tested what we wanted
    } finally {
        admin.drop();
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Transaction(org.apache.tephra.Transaction) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 98 with DatasetAdmin

use of io.cdap.cdap.api.dataset.DatasetAdmin in project cdap by cdapio.

the class HBaseTableTest method testScannerCache.

@Test
public void testScannerCache() throws Exception {
    String tableName = "scanCache";
    // note: it appears that HBase only enforces the scanner timeout after 10 seconds.
    // setting it to 3 seconds does not mean it will actually fsail after 3 sweconds.
    // therefore we have to cross the 10 seconds. here: 1200 times 10ms sleep.
    int numRows = 1200;
    DatasetAdmin admin = getTableAdmin(CONTEXT1, tableName);
    admin.create();
    try (Table myTable1 = getTable(CONTEXT1, tableName)) {
        // write some rows and commit
        Transaction tx1 = txClient.startShort();
        ((TransactionAware) myTable1).startTx(tx1);
        for (int i = 0; i < numRows; i++) {
            myTable1.put(new Put("" + i, "x", "y"));
        }
        txClient.canCommitOrThrow(tx1, ((TransactionAware) myTable1).getTxChanges());
        Assert.assertTrue(((TransactionAware) myTable1).commitTx());
        txClient.commitOrThrow(tx1);
        try {
            testScannerCache(numRows, tableName, null, null, null);
            Assert.fail("this should have failed with ScannerTimeoutException");
        } catch (Exception e) {
            // we expect a RuntimeException wrapping an HBase ScannerTimeoutException
            if (!(e.getCause() instanceof ScannerTimeoutException)) {
                throw e;
            }
        }
        // cache=100 as dataset property
        testScannerCache(numRows, tableName, "100", null, null);
        // cache=100 as dataset runtime argument
        testScannerCache(numRows, tableName, "1000", "100", null);
        // cache=100 as scan property
        testScannerCache(numRows, tableName, "5000", "1000", "100");
    } finally {
        admin.drop();
    }
}
Also used : BufferingTable(io.cdap.cdap.data2.dataset2.lib.table.BufferingTable) Table(io.cdap.cdap.api.dataset.table.Table) DelegatingTable(io.cdap.cdap.data2.util.hbase.DelegatingTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) ScannerTimeoutException(org.apache.hadoop.hbase.client.ScannerTimeoutException) Put(io.cdap.cdap.api.dataset.table.Put) ScannerTimeoutException(org.apache.hadoop.hbase.client.ScannerTimeoutException) IOException(java.io.IOException) BufferingTableTest(io.cdap.cdap.data2.dataset2.lib.table.BufferingTableTest) Test(org.junit.Test)

Example 99 with DatasetAdmin

use of io.cdap.cdap.api.dataset.DatasetAdmin in project cdap by cdapio.

the class DatasetAdminService method drop.

public void drop(final DatasetId datasetInstanceId, final DatasetTypeMeta typeMeta, final DatasetSpecification spec) throws Exception {
    LOG.info("Dropping dataset with spec: {}, type meta: {}", spec, typeMeta);
    try (DatasetClassLoaderProvider classLoaderProvider = new DirectoryClassLoaderProvider(cConf, locationFactory)) {
        UserGroupInformation ugi = getUgiForDataset(impersonator, datasetInstanceId);
        ImpersonationUtils.doAs(ugi, (Callable<Void>) () -> {
            DatasetType type = dsFramework.getDatasetType(typeMeta, null, classLoaderProvider);
            if (type == null) {
                throw new BadRequestException(String.format("Cannot instantiate dataset type using provided type meta: %s", typeMeta));
            }
            DatasetAdmin admin = type.getAdmin(DatasetContext.from(datasetInstanceId.getNamespace()), spec);
            try {
                admin.drop();
            } finally {
                Closeables.closeQuietly(admin);
            }
            return null;
        });
    }
}
Also used : DirectoryClassLoaderProvider(io.cdap.cdap.data2.datafabric.dataset.type.DirectoryClassLoaderProvider) BadRequestException(io.cdap.cdap.common.BadRequestException) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) DatasetClassLoaderProvider(io.cdap.cdap.data2.datafabric.dataset.type.DatasetClassLoaderProvider) DatasetType(io.cdap.cdap.data2.datafabric.dataset.DatasetType) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 100 with DatasetAdmin

use of io.cdap.cdap.api.dataset.DatasetAdmin in project cdap by cdapio.

the class DatasetAdminService method createOrUpdate.

/**
 * Configures and creates a Dataset
 *
 * @param datasetInstanceId dataset instance to be created
 * @param typeMeta type meta for the dataset
 * @param props dataset instance properties
 * @param existing if dataset already exists (in case of update), the existing properties
 * @return dataset specification
 */
public DatasetCreationResponse createOrUpdate(final DatasetId datasetInstanceId, final DatasetTypeMeta typeMeta, final DatasetProperties props, @Nullable final DatasetSpecification existing) throws Exception {
    if (existing == null) {
        LOG.info("Creating dataset instance {}, type meta: {}", datasetInstanceId, typeMeta);
    } else {
        LOG.info("Updating dataset instance {}, type meta: {}, existing: {}", datasetInstanceId, typeMeta, existing);
    }
    try (DatasetClassLoaderProvider classLoaderProvider = new DirectoryClassLoaderProvider(cConf, locationFactory)) {
        final DatasetContext context = DatasetContext.from(datasetInstanceId.getNamespace());
        UserGroupInformation ugi = getUgiForDataset(impersonator, datasetInstanceId);
        final DatasetType type = ImpersonationUtils.doAs(ugi, () -> {
            LOG.trace("Getting dataset type {}", typeMeta.getName());
            DatasetType type1 = dsFramework.getDatasetType(typeMeta, null, classLoaderProvider);
            if (type1 == null) {
                throw new BadRequestException(String.format("Cannot instantiate dataset type using provided type meta: %s", typeMeta));
            }
            LOG.trace("Got dataset type {}", typeMeta.getName());
            return type1;
        });
        DatasetSpecification spec = ImpersonationUtils.doAs(ugi, () -> {
            LOG.trace("Configuring dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
            DatasetSpecification spec1 = existing == null ? type.configure(datasetInstanceId.getEntityName(), props) : type.reconfigure(datasetInstanceId.getEntityName(), props, existing);
            LOG.trace("Configured dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
            DatasetAdmin admin = type.getAdmin(context, spec1);
            try {
                if (existing != null) {
                    if (admin instanceof Updatable) {
                        ((Updatable) admin).update(existing);
                    } else {
                        admin.upgrade();
                    }
                } else {
                    LOG.trace("Creating dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
                    admin.create();
                    LOG.trace("Created dataset {} of type {}", datasetInstanceId.getDataset(), typeMeta.getName());
                }
            } finally {
                Closeables.closeQuietly(admin);
            }
            return spec1;
        });
        // Writing system metadata should be done without impersonation since user may not have access to system tables.
        LOG.trace("Computing metadata for dataset {}", datasetInstanceId.getDataset());
        SystemMetadata metadata = computeSystemMetadata(datasetInstanceId, spec, props, typeMeta, type, context, existing != null, ugi);
        LOG.trace("Computed metadata for dataset {}", datasetInstanceId.getDataset());
        return new DatasetCreationResponse(spec, metadata);
    } catch (Exception e) {
        if (e instanceof IncompatibleUpdateException) {
            // this is expected to happen if user provides bad update properties, so we log this as debug
            LOG.debug("Incompatible update for dataset '{}'", datasetInstanceId, e);
        } else {
            LOG.error("Error {} dataset '{}': {}", existing == null ? "creating" : "updating", datasetInstanceId, e.getMessage(), e);
        }
        throw e;
    }
}
Also used : DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) DatasetType(io.cdap.cdap.data2.datafabric.dataset.DatasetType) IncompatibleUpdateException(io.cdap.cdap.api.dataset.IncompatibleUpdateException) AccessException(io.cdap.cdap.api.security.AccessException) IOException(java.io.IOException) BadRequestException(io.cdap.cdap.common.BadRequestException) NotFoundException(io.cdap.cdap.common.NotFoundException) DirectoryClassLoaderProvider(io.cdap.cdap.data2.datafabric.dataset.type.DirectoryClassLoaderProvider) Updatable(io.cdap.cdap.api.dataset.Updatable) SystemMetadata(io.cdap.cdap.data2.metadata.system.SystemMetadata) BadRequestException(io.cdap.cdap.common.BadRequestException) DatasetClassLoaderProvider(io.cdap.cdap.data2.datafabric.dataset.type.DatasetClassLoaderProvider) DatasetContext(io.cdap.cdap.api.dataset.DatasetContext) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) IncompatibleUpdateException(io.cdap.cdap.api.dataset.IncompatibleUpdateException)

Aggregations

DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)112 Test (org.junit.Test)60 Table (io.cdap.cdap.api.dataset.table.Table)54 Transaction (org.apache.tephra.Transaction)54 TransactionAware (org.apache.tephra.TransactionAware)46 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)42 DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)20 DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)16 Get (io.cdap.cdap.api.dataset.table.Get)16 Put (io.cdap.cdap.api.dataset.table.Put)14 IOException (java.io.IOException)14 Row (io.cdap.cdap.api.dataset.table.Row)12 DatasetType (io.cdap.cdap.data2.datafabric.dataset.DatasetType)10 TransactionConflictException (org.apache.tephra.TransactionConflictException)10 Scan (io.cdap.cdap.api.dataset.table.Scan)8 BufferingTableTest (io.cdap.cdap.data2.dataset2.lib.table.BufferingTableTest)8 AbstractModule (com.google.inject.AbstractModule)6 TypeLiteral (com.google.inject.TypeLiteral)6 DatasetContext (io.cdap.cdap.api.dataset.DatasetContext)6 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)6