Search in sources :

Example 71 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class TimePartitionedFileSetTest method testAddGetPartitions.

@Test
public void testAddGetPartitions() throws Exception {
    final TimePartitionedFileSet fileSet = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
    TransactionAware txAwareDataset = (TransactionAware) fileSet;
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // this is an arbitrary data to use as the test time
            long time = DATE_FORMAT.parse("12/10/14 5:10 am").getTime();
            long time2 = time + HOUR;
            String firstPath = "first/partition";
            String secondPath = "second/partition";
            // make sure the file set has no partitions initially
            validateTimePartition(fileSet, time, null);
            validateTimePartitions(fileSet, 0L, MAX, Collections.<Long, String>emptyMap());
            // add a partition, verify getPartition() works
            fileSet.addPartition(time, firstPath);
            validateTimePartition(fileSet, time, firstPath);
            Map<Long, String> expectNone = Collections.emptyMap();
            Map<Long, String> expectFirst = ImmutableMap.of(time, firstPath);
            Map<Long, String> expectSecond = ImmutableMap.of(time2, secondPath);
            Map<Long, String> expectBoth = ImmutableMap.of(time, firstPath, time2, secondPath);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, time + MINUTE, MAX, expectNone);
            validateTimePartitions(fileSet, 0L, time, expectNone);
            validateTimePartitions(fileSet, 0L, MAX, expectFirst);
            validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
            validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
            validateTimePartitions(fileSet, 0L, time + HOUR, expectFirst);
            validateTimePartitions(fileSet, time - HOUR, time + HOUR, expectFirst);
            // add and verify another partition
            fileSet.addPartition(time2, secondPath);
            validateTimePartition(fileSet, time2, secondPath);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, 0L, MAX, expectBoth);
            validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectFirst);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectBoth);
            // try to add another partition with the same key
            try {
                fileSet.addPartition(time2, "third/partition");
                Assert.fail("Should have thrown Exception for duplicate partition");
            } catch (DataSetException e) {
            // expected
            }
            // remove first partition and validate
            fileSet.dropPartition(time);
            validateTimePartition(fileSet, time, null);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, 0L, MAX, expectSecond);
            validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            // try to delete  another partition with the same key
            try {
                fileSet.dropPartition(time);
            } catch (DataSetException e) {
                Assert.fail("Should not have have thrown Exception for removing non-existent partition");
            }
        }
    });
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TransactionFailureException(org.apache.tephra.TransactionFailureException) UnauthorizedException(io.cdap.cdap.security.spi.authorization.UnauthorizedException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) Test(org.junit.Test)

Example 72 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class TimePartitionedFileSetTest method testPartitionMetadata.

@Test
public void testPartitionMetadata() throws Exception {
    final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
    TransactionAware txAware = (TransactionAware) tpfs;
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAware).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // make sure the dataset has no partitions
            validateTimePartitions(tpfs, 0L, MAX, Collections.<Long, String>emptyMap());
            Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
            long time = date.getTime();
            // keep track of all the metadata added
            Map<String, String> allMetadata = Maps.newHashMap();
            Map<String, String> metadata = ImmutableMap.of("key1", "value1", "key2", "value3", "key100", "value4");
            tpfs.addPartition(time, "file", metadata);
            allMetadata.putAll(metadata);
            TimePartitionDetail partitionByTime = tpfs.getPartitionByTime(time);
            Assert.assertNotNull(partitionByTime);
            Assert.assertEquals(metadata, partitionByTime.getMetadata().asMap());
            tpfs.addMetadata(time, "key3", "value4");
            allMetadata.put("key3", "value4");
            // using the setMetadata API, adding an entry, for a key that already exists will overwrite the previous value
            tpfs.setMetadata(time, Collections.singletonMap("key3", "value5"));
            allMetadata.put("key3", "value5");
            Map<String, String> newMetadata = ImmutableMap.of("key4", "value4", "key5", "value5");
            tpfs.addMetadata(time, newMetadata);
            allMetadata.putAll(newMetadata);
            try {
                // attempting to update an existing key throws a DatasetException
                tpfs.addMetadata(time, "key3", "value5");
                Assert.fail("Expected not to be able to update an existing metadata entry");
            } catch (DataSetException expected) {
            }
            partitionByTime = tpfs.getPartitionByTime(time);
            Assert.assertNotNull(partitionByTime);
            Assert.assertEquals(allMetadata, partitionByTime.getMetadata().asMap());
            // remove metadata entries; specifying metadata key that does not exist ('key6') does not cause an error
            tpfs.removeMetadata(time, ImmutableSet.of("key4", "key5", "key6"));
            allMetadata.remove("key4");
            allMetadata.remove("key5");
            partitionByTime = tpfs.getPartitionByTime(time);
            Assert.assertNotNull(partitionByTime);
            Assert.assertEquals(allMetadata, partitionByTime.getMetadata().asMap());
        }
    });
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionDetail(io.cdap.cdap.api.dataset.lib.TimePartitionDetail) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TransactionFailureException(org.apache.tephra.TransactionFailureException) UnauthorizedException(io.cdap.cdap.security.spi.authorization.UnauthorizedException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) Date(java.util.Date) Test(org.junit.Test)

Example 73 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionRollbackTestRun method testPFSRollback.

/*
   * This tests all the following cases:
   *
   *  1. addPartition(location) fails because partition already exists
   *  2. addPartition(location) fails because Hive partition already exists
   *  3. addPartition(location) succeeds but transaction fails
   *  4. getPartitionOutput() fails because partition already exists
   *  5. partitionOutput.addPartition() fails because Hive partition already exists
   *  6. partitionOutput.addPartition() succeeds but transaction fails
   *  7. mapreduce writing partition fails because location already exists
   *  8. mapreduce writing partition fails because partition already exists
   *  9. mapreduce writing partition fails because Hive partition already exists
   *  10. mapreduce writing dynamic partition fails because location already exists
   *  11. mapreduce writing dynamic partition fails because partition already exists
   *  12. mapreduce writing dynamic partition fails because Hive partition already exists
   *  13. multi-output mapreduce writing partition fails because location already exists
   *  13a. first output fails, other output must rollback 0 and 5
   *  13b. second output fails, first output must rollback 0 and 5
   *  14. multi-output mapreduce writing partition fails because partition already exists
   *  14a. first output fails, other output must rollback partition 5
   *  14b. second output fails, first output must rollback partition 5
   *  15. multi-output mapreduce writing partition fails because Hive partition already exists
   *  15a. first output fails, other output must rollback partitions 0 and 5
   *  15b. second output fails, first output must rollback partitions 0 and 5
   *
   * For all these cases, we validate that existing files and partitions are preserved, and newly
   * added files and partitions are rolled back.
   */
@Test
public void testPFSRollback() throws Exception {
    ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
    MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
    int numRuns = 0;
    Validator pfsValidator = new Validator(PFS);
    Validator otherValidator = new Validator(OTHER);
    final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
    final PartitionedFileSet pfs = pfsManager.get();
    final PartitionedFileSet other = otherValidator.getPfsManager().get();
    final String path3 = pfsValidator.getRelativePath3();
    // 1. addPartition(location) fails because partition already exists
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_1, path3);
            }
        });
        Assert.fail("Expected tx to fail because partition for number=1 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 2. addPartition(location) fails because Hive partition already exists
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_4, path3);
            }
        });
        Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 3. addPartition(location) succeeds but transaction fails
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_3, path3);
                throw new RuntimeException("fail the tx");
            }
        });
        Assert.fail("Expected tx to fail because it threw a runtime exception");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 4. partitionOutput.getPartitionOutput() fails because partition already exists
    try {
        pfs.getPartitionOutput(KEY_1);
        Assert.fail("Expected getPartitionOutput to fail, because the partition already exists.");
    } catch (DataSetException expected) {
    }
    pfsValidator.validate();
    // 5. partitionOutput.addPartition() fails because Hive partition already exists
    final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
    final Location location4x = output4x.getLocation();
    try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
        writer.write("4x,4x\n");
    }
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                output4x.addPartition();
            }
        });
        Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    Assert.assertFalse(location4x.exists());
    // 6. partitionOutput.addPartition() succeeds but transaction fails
    final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
    final Location location5x = output5x.getLocation();
    try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
        writer.write("5x,5x\n");
    }
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                output5x.addPartition();
                throw new RuntimeException("fail the tx");
            }
        });
        Assert.fail("Expected tx to fail because it threw a runtime exception");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    Assert.assertFalse(location5x.exists());
    // 7. mapreduce writing partition fails because location already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    // 8. mapreduce writing partition fails because partition already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
    // 9. mapreduce writing partition fails because Hive partition already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    // 10. mapreduce writing dynamic partition fails because location already exists
    mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 11. mapreduce writing dynamic partition fails because partition already exists
    mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 12. mapreduce writing dynamic partition fails because Hive partition already exists
    mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 13. multi-output mapreduce writing partition fails because location already exists
    // 13a. first output fails, other output must rollback 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 13b. second output fails, first output must rollback 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 14. multi-output mapreduce writing partition fails because partition already exists
    // 14a. first output fails, other output must rollback partition 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 14b. second output fails, first output must rollback partition 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 15. multi-output mapreduce writing partition fails because Hive partition already exists
    // 15a. first output fails, other output must rollback partitions 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 15b. second output fails, first output must rollback partitions 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MapReduceManager(io.cdap.cdap.test.MapReduceManager) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionOutput(io.cdap.cdap.api.dataset.lib.PartitionOutput) UnitTestManager(io.cdap.cdap.test.UnitTestManager) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 74 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class AppWithCustomTx method recordTransaction.

/**
 * If in a transaction, records the timeout that the current transaction was given, or "default" if no explicit
 * timeout was given. Otherwise does nothing.
 *
 * Note: we know whether and what explicit timeout was given, because we inject a {@link RevealingTxSystemClient},
 *       which returns a {@link RevealingTransaction} for {@link TransactionSystemClient#startShort(int)} only.
 */
static void recordTransaction(DatasetContext context, String row, String column) {
    TransactionCapturingTable capture = context.getDataset(CAPTURE);
    Transaction tx = capture.getTx();
    // we cannot cast because the RevealingTransaction is not visible in the program class loader
    String value = DEFAULT;
    if (tx == null) {
        try {
            capture.getTable().put(new Put(row, column, value));
            throw new RuntimeException("put to table without transaction should have failed.");
        } catch (DataSetException e) {
        // expected
        }
        return;
    }
    if ("RevealingTransaction".equals(tx.getClass().getSimpleName())) {
        int txTimeout;
        try {
            txTimeout = (int) tx.getClass().getField("timeout").get(tx);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        value = String.valueOf(txTimeout);
    }
    capture.getTable().put(new Put(row, column, value));
}
Also used : Transaction(org.apache.tephra.Transaction) RevealingTransaction(io.cdap.cdap.test.RevealingTxSystemClient.RevealingTransaction) DataSetException(io.cdap.cdap.api.dataset.DataSetException) Put(io.cdap.cdap.api.dataset.table.Put) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException)

Aggregations

DataSetException (io.cdap.cdap.api.dataset.DataSetException)74 IOException (java.io.IOException)54 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)14 Map (java.util.Map)12 TransactionFailureException (org.apache.tephra.TransactionFailureException)12 Location (org.apache.twill.filesystem.Location)12 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 Result (io.cdap.cdap.api.dataset.table.Result)10 NavigableMap (java.util.NavigableMap)10 Test (org.junit.Test)10 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)8 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)8 Put (org.apache.hadoop.hbase.client.Put)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)6 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)6 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)6 Put (io.cdap.cdap.api.dataset.table.Put)6 Row (io.cdap.cdap.api.dataset.table.Row)6 UnauthorizedException (io.cdap.cdap.security.spi.authorization.UnauthorizedException)6