Search in sources :

Example 36 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedFileSet.

private void testPartitionedFileSet(@Nullable String dbName, @Nullable String tableName) throws Exception {
    DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parted");
    String hiveTableName = getDatasetHiveName(datasetInstanceId);
    String showTablesCommand = "show tables";
    FileSetProperties.Builder props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addStringField("str").addIntField("num").build()).setBasePath("parted").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
    if (tableName != null) {
        props.setExploreTableName(tableName);
        hiveTableName = tableName;
    }
    String queryTableName = hiveTableName;
    if (dbName != null) {
        props.setExploreDatabaseName(dbName);
        runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
        showTablesCommand += " in " + dbName;
        queryTableName = dbName + "." + queryTableName;
    }
    // create a time partitioned file set
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, props.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // Accessing dataset instance to perform data operations
    final PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    Location locationX1 = fileSet.getLocation("fileX1/nn");
    Location locationY1 = fileSet.getLocation("fileY1/nn");
    Location locationX2 = fileSet.getLocation("fileX2/nn");
    Location locationY2 = fileSet.getLocation("fileY2/nn");
    FileWriterHelper.generateAvroFile(locationX1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(locationY1.getOutputStream(), "y", 1, 2);
    FileWriterHelper.generateAvroFile(locationX2.getOutputStream(), "x", 2, 3);
    FileWriterHelper.generateAvroFile(locationY2.getOutputStream(), "y", 2, 3);
    final PartitionKey keyX1 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 1).build();
    PartitionKey keyY1 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 1).build();
    final PartitionKey keyX2 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 2).build();
    PartitionKey keyY2 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 2).build();
    addPartition(partitioned, keyX1, "fileX1");
    addPartition(partitioned, keyY1, "fileY1");
    addPartition(partitioned, keyX2, "fileX2");
    addPartition(partitioned, keyY2, "fileY2");
    // verify that the partitions were added to Hive
    validatePartitions(queryTableName, partitioned, ImmutableList.of(keyX1, keyX2, keyY1, keyY2));
    // verify that count() and where... work in Hive
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(4L))));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " WHERE num = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null), new ColumnDesc(hiveTableName + ".str", "STRING", 3, null), new ColumnDesc(hiveTableName + ".num", "INT", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x2", "#2", "x", 2)), new QueryResult(Lists.<Object>newArrayList("y2", "#2", "y", 2))));
    // drop a partition and query again
    dropPartition(partitioned, keyX2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that drops one partition, adds another, and then fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.dropPartition(keyX1);
                partitioned.addPartition(keyX2, "fileX2");
                Assert.fail("fail tx");
            }
        });
    } catch (TransactionFailureException e) {
    // expected
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that attempts to add an existing partition, hence fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.addPartition(keyX1, "fileX1");
                throw new RuntimeException("on purpose");
            }
        });
    } catch (TransactionFailureException e) {
        // expected if the cause is not "on purpose"
        Assert.assertTrue(e.getCause() instanceof DataSetException);
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // drop a partition directly from hive
    runCommand(NAMESPACE_ID, "ALTER TABLE " + queryTableName + " DROP PARTITION (str='y', num=2)", false, null, null);
    // verify that one more value is gone now, namely y2, in Hive, but the PFS still has it
    validatePartitionsInHive(queryTableName, ImmutableSet.of(keyX1, keyY1));
    validatePartitionsInPFS(partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // make sure the partition can still be dropped from the PFS dataset
    dropPartition(partitioned, keyY2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1));
    // change the explore schema by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    // valudate the schema was updated
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1), true);
    // disable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // re-enable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
    // verify the Hive table is back
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, null, Collections.<QueryResult>emptyList());
}
Also used : TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId) FileSetProperties(co.cask.cdap.api.dataset.lib.FileSetProperties) PartitionedFileSetProperties(co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties) QueryResult(co.cask.cdap.proto.QueryResult) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(co.cask.cdap.api.dataset.DataSetException) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) Location(org.apache.twill.filesystem.Location)

Example 37 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class TimePartitionedFileSetTest method validateInputPaths.

/**
 * Validates that the output configuration of the tpfs, when instantiated with (time - start * minutes) as
 * input start time and (time + end * minutes) as input end time, returns the expected list of paths.
 */
private void validateInputPaths(long time, long start, long end, final String... expected) throws IOException, DatasetManagementException, InterruptedException, TransactionFailureException {
    Map<String, String> arguments = Maps.newHashMap();
    TimePartitionedFileSetArguments.setInputStartTime(arguments, time + start * MINUTE);
    TimePartitionedFileSetArguments.setInputEndTime(arguments, time + end * MINUTE);
    final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE, arguments);
    TransactionAware txAwareDataset = (TransactionAware) tpfs;
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Map<String, String> inputConfig = tpfs.getInputFormatConfiguration();
            String inputs = inputConfig.get(FileInputFormat.INPUT_DIR);
            Assert.assertNotNull(inputs);
            if (expected.length == 0) {
                Assert.assertTrue(inputs.isEmpty());
                return;
            }
            String[] inputPaths = inputs.split(",");
            Assert.assertEquals(expected.length, inputPaths.length);
            // order is not guaranteed.
            Arrays.sort(expected);
            Arrays.sort(inputPaths);
            for (int i = 0; i < expected.length; i++) {
                // every input path is absolute, whereas expected paths are relative
                Assert.assertTrue("path #" + i + " does not match", inputPaths[i].endsWith(expected[i]));
            }
        }
    });
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException)

Example 38 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class MetadataDatasetTest method testSearchIncludesSystemEntities.

@Test
public void testSearchIncludesSystemEntities() throws InterruptedException, TransactionFailureException {
    // Use the same artifact in two different namespaces - system and ns2
    final ArtifactId sysArtifact = NamespaceId.SYSTEM.artifact("artifact", "1.0");
    final ArtifactId ns2Artifact = new ArtifactId("ns2", "artifact", "1.0");
    final String multiWordKey = "multiword";
    final String multiWordValue = "aV1 av2 ,  -  ,  av3 - av4_av5 av6";
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.setProperty(flow1, multiWordKey, multiWordValue);
            dataset.setProperty(sysArtifact, multiWordKey, multiWordValue);
            dataset.setProperty(ns2Artifact, multiWordKey, multiWordValue);
        }
    });
    // perform the exact same multiword search in the 'ns1' namespace. It should return the system artifact along with
    // matched entities in the 'ns1' namespace
    final MetadataEntry flowMultiWordEntry = new MetadataEntry(flow1, multiWordKey, multiWordValue);
    final MetadataEntry systemArtifactEntry = new MetadataEntry(sysArtifact, multiWordKey, multiWordValue);
    final MetadataEntry ns2ArtifactEntry = new MetadataEntry(ns2Artifact, multiWordKey, multiWordValue);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> results = searchByDefaultIndex("ns1", "aV5", ImmutableSet.of(EntityTypeSimpleName.ALL));
            Assert.assertEquals(Sets.newHashSet(flowMultiWordEntry, systemArtifactEntry), Sets.newHashSet(results));
            // search only programs - should only return flow
            results = searchByDefaultIndex("ns1", multiWordKey + MetadataDataset.KEYVALUE_SEPARATOR + "aV5", ImmutableSet.of(EntityTypeSimpleName.PROGRAM));
            Assert.assertEquals(ImmutableList.of(flowMultiWordEntry), results);
            // search only artifacts - should only return system artifact
            results = searchByDefaultIndex("ns1", multiWordKey + MetadataDataset.KEYVALUE_SEPARATOR + multiWordValue, ImmutableSet.of(EntityTypeSimpleName.ARTIFACT));
            // this query returns the system artifact 4 times, since the dataset returns a list with duplicates for scoring
            // purposes. Convert to a Set for comparison.
            Assert.assertEquals(Sets.newHashSet(systemArtifactEntry), Sets.newHashSet(results));
            // search all entities in namespace 'ns2' - should return the system artifact and the same artifact in ns2
            results = searchByDefaultIndex("ns2", multiWordKey + MetadataDataset.KEYVALUE_SEPARATOR + "aV4", ImmutableSet.of(EntityTypeSimpleName.ALL));
            Assert.assertEquals(Sets.newHashSet(systemArtifactEntry, ns2ArtifactEntry), Sets.newHashSet(results));
            // search only programs in a namespace 'ns2'. Should return empty
            results = searchByDefaultIndex("ns2", "aV*", ImmutableSet.of(EntityTypeSimpleName.PROGRAM));
            Assert.assertTrue(results.isEmpty());
            // search all entities in namespace 'ns3'. Should return only the system artifact
            results = searchByDefaultIndex("ns3", "av*", ImmutableSet.of(EntityTypeSimpleName.ALL));
            Assert.assertEquals(Sets.newHashSet(systemArtifactEntry), Sets.newHashSet(results));
            // search the system namespace for all entities. Should return only the system artifact
            results = searchByDefaultIndex(NamespaceId.SYSTEM.getEntityName(), "av*", ImmutableSet.of(EntityTypeSimpleName.ALL));
            Assert.assertEquals(Sets.newHashSet(systemArtifactEntry), Sets.newHashSet(results));
        }
    });
    // clean up
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.removeProperties(flow1);
            dataset.removeProperties(sysArtifact);
        }
    });
}
Also used : ArtifactId(co.cask.cdap.proto.id.ArtifactId) TransactionExecutor(org.apache.tephra.TransactionExecutor) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) Test(org.junit.Test)

Example 39 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class MetadataDatasetTest method testSearchDifferentEntityScope.

@Test
public void testSearchDifferentEntityScope() throws InterruptedException, TransactionFailureException {
    final ArtifactId sysArtifact = NamespaceId.SYSTEM.artifact("artifact", "1.0");
    final ArtifactId nsArtifact = new ArtifactId("ns1", "artifact", "1.0");
    final String multiWordKey = "multiword";
    final String multiWordValue = "aV1 av2 ,  -  ,  av3 - av4_av5 av6";
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.setProperty(nsArtifact, multiWordKey, multiWordValue);
            dataset.setProperty(sysArtifact, multiWordKey, multiWordValue);
        }
    });
    final MetadataEntry systemArtifactEntry = new MetadataEntry(sysArtifact, multiWordKey, multiWordValue);
    final MetadataEntry nsArtifactEntry = new MetadataEntry(nsArtifact, multiWordKey, multiWordValue);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> results = dataset.search("ns1", "aV5", ImmutableSet.of(EntityTypeSimpleName.ALL), SortInfo.DEFAULT, 0, Integer.MAX_VALUE, 1, null, false, EnumSet.of(EntityScope.USER)).getResults();
            // the result should not contain system entities
            Assert.assertEquals(Sets.newHashSet(nsArtifactEntry), Sets.newHashSet(results));
            results = dataset.search("ns1", "aV5", ImmutableSet.of(EntityTypeSimpleName.ALL), SortInfo.DEFAULT, 0, Integer.MAX_VALUE, 1, null, false, EnumSet.of(EntityScope.SYSTEM)).getResults();
            // the result should not contain user entities
            Assert.assertEquals(Sets.newHashSet(systemArtifactEntry), Sets.newHashSet(results));
            results = dataset.search("ns1", "aV5", ImmutableSet.of(EntityTypeSimpleName.ALL), SortInfo.DEFAULT, 0, Integer.MAX_VALUE, 1, null, false, EnumSet.allOf(EntityScope.class)).getResults();
            // the result should contain both entity scopes
            Assert.assertEquals(Sets.newHashSet(nsArtifactEntry, systemArtifactEntry), Sets.newHashSet(results));
        }
    });
    // clean up
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.removeProperties(nsArtifact);
            dataset.removeProperties(sysArtifact);
        }
    });
}
Also used : ArtifactId(co.cask.cdap.proto.id.ArtifactId) TransactionExecutor(org.apache.tephra.TransactionExecutor) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) Test(org.junit.Test)

Example 40 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class QueueTest method enqueueDequeue.

private void enqueueDequeue(final QueueName queueName, int preEnqueueCount, int concurrentCount, int enqueueBatchSize, int consumerSize, DequeueStrategy dequeueStrategy, final int dequeueBatchSize) throws Exception {
    ConsumerGroupConfig groupConfig = new ConsumerGroupConfig(0L, consumerSize, dequeueStrategy, "key");
    configureGroups(queueName, ImmutableList.of(groupConfig));
    Preconditions.checkArgument(preEnqueueCount % enqueueBatchSize == 0, "Count must be divisible by enqueueBatchSize");
    Preconditions.checkArgument(concurrentCount % enqueueBatchSize == 0, "Count must be divisible by enqueueBatchSize");
    final List<ConsumerConfig> consumerConfigs = Lists.newArrayList();
    for (int i = 0; i < consumerSize; i++) {
        consumerConfigs.add(new ConsumerConfig(groupConfig, i));
    }
    createEnqueueRunnable(queueName, preEnqueueCount, enqueueBatchSize, null).run();
    final CyclicBarrier startBarrier = new CyclicBarrier(consumerSize + 2);
    ExecutorService executor = Executors.newFixedThreadPool(consumerSize + 1);
    // Enqueue thread
    executor.submit(createEnqueueRunnable(queueName, concurrentCount, enqueueBatchSize, startBarrier));
    // Dequeue
    final long expectedSum = ((long) preEnqueueCount / 2 * ((long) preEnqueueCount - 1)) + ((long) concurrentCount / 2 * ((long) concurrentCount - 1));
    final AtomicLong valueSum = new AtomicLong();
    final CountDownLatch completeLatch = new CountDownLatch(consumerSize);
    for (int i = 0; i < consumerSize; i++) {
        final int instanceId = i;
        executor.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    startBarrier.await();
                    LOG.info("Consumer {} starts consuming {}", instanceId, queueName.getSimpleName());
                    try (QueueConsumer consumer = queueClientFactory.createConsumer(queueName, consumerConfigs.get(instanceId), 1)) {
                        TransactionContext txContext = createTxContext(consumer);
                        Stopwatch stopwatch = new Stopwatch();
                        stopwatch.start();
                        int dequeueCount = 0;
                        while (valueSum.get() < expectedSum) {
                            txContext.start();
                            try {
                                DequeueResult<byte[]> result = consumer.dequeue(dequeueBatchSize);
                                txContext.finish();
                                if (result.isEmpty()) {
                                    continue;
                                }
                                for (byte[] data : result) {
                                    valueSum.addAndGet(Bytes.toInt(data));
                                    dequeueCount++;
                                }
                            } catch (TransactionFailureException e) {
                                LOG.error("Operation error", e);
                                txContext.abort();
                                throw Throwables.propagate(e);
                            }
                        }
                        long elapsed = stopwatch.elapsedTime(TimeUnit.MILLISECONDS);
                        LOG.info("Dequeue {} entries in {} ms for {}", dequeueCount, elapsed, queueName.getSimpleName());
                        LOG.info("Dequeue avg {} entries per seconds for {}", (double) dequeueCount * 1000 / elapsed, queueName.getSimpleName());
                        consumer.close();
                        completeLatch.countDown();
                    }
                } catch (Exception e) {
                    LOG.error(e.getMessage(), e);
                }
            }
        });
    }
    startBarrier.await();
    completeLatch.await();
    Assert.assertEquals(expectedSum, valueSum.get());
    // Only check eviction for queue.
    if (!queueName.isStream()) {
        verifyQueueIsEmpty(queueName, consumerConfigs);
    }
    executor.shutdownNow();
}
Also used : Stopwatch(com.google.common.base.Stopwatch) CountDownLatch(java.util.concurrent.CountDownLatch) TransactionFailureException(org.apache.tephra.TransactionFailureException) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicLong(java.util.concurrent.atomic.AtomicLong) TransactionFailureException(org.apache.tephra.TransactionFailureException) QueueConsumer(co.cask.cdap.data2.queue.QueueConsumer) DequeueResult(co.cask.cdap.data2.queue.DequeueResult) TransactionContext(org.apache.tephra.TransactionContext) ExecutorService(java.util.concurrent.ExecutorService) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) ConsumerGroupConfig(co.cask.cdap.data2.queue.ConsumerGroupConfig)

Aggregations

TransactionFailureException (org.apache.tephra.TransactionFailureException)55 Test (org.junit.Test)19 TransactionContext (org.apache.tephra.TransactionContext)17 IOException (java.io.IOException)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)12 TransactionConflictException (org.apache.tephra.TransactionConflictException)8 TxRunnable (co.cask.cdap.api.TxRunnable)6 DatasetContext (co.cask.cdap.api.data.DatasetContext)6 Location (org.apache.twill.filesystem.Location)6 TransactionAware (org.apache.tephra.TransactionAware)5 DataSetException (co.cask.cdap.api.dataset.DataSetException)4 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 Table (co.cask.cdap.api.dataset.table.Table)4 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)4 List (java.util.List)4 Map (java.util.Map)4 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 TimeoutException (java.util.concurrent.TimeoutException)3 Transaction (org.apache.tephra.Transaction)3