Search in sources :

Example 36 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class DynamicDatasetCacheTest method getTxAwares.

private List<TestDataset> getTxAwares() {
    SortedSet<TestDataset> set = new TreeSet<>();
    for (TransactionAware txAware : cache.getTransactionAwares()) {
        TestDataset dataset = (TestDataset) txAware;
        set.add(dataset);
    }
    return ImmutableList.copyOf(set);
}
Also used : TreeSet(java.util.TreeSet) TransactionAware(org.apache.tephra.TransactionAware)

Example 37 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class InMemoryStreamFileWriterFactory method create.

@Override
public FileWriter<StreamEvent> create(StreamConfig config, int generation) throws IOException {
    final QueueProducer producer = queueClientFactory.createProducer(QueueName.fromStream(config.getStreamId()));
    final List<TransactionAware> txAwares = Lists.newArrayList();
    if (producer instanceof TransactionAware) {
        txAwares.add((TransactionAware) producer);
    }
    final TransactionExecutor txExecutor = executorFactory.createExecutor(txAwares);
    // Adapt the FileWriter interface into Queue2Producer
    return new FileWriter<StreamEvent>() {

        private final List<StreamEvent> events = Lists.newArrayList();

        @Override
        public void append(StreamEvent event) throws IOException {
            events.add(event);
        }

        @Override
        public void appendAll(Iterator<? extends StreamEvent> events) throws IOException {
            Iterators.addAll(this.events, events);
        }

        @Override
        public void close() throws IOException {
            producer.close();
        }

        @Override
        public void flush() throws IOException {
            try {
                txExecutor.execute(new TransactionExecutor.Subroutine() {

                    @Override
                    public void apply() throws Exception {
                        for (StreamEvent event : events) {
                            producer.enqueue(new QueueEntry(STREAM_EVENT_CODEC.encodePayload(event)));
                        }
                        events.clear();
                    }
                });
            } catch (TransactionFailureException e) {
                throw new IOException(e);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new InterruptedIOException();
            }
        }
    };
}
Also used : InterruptedIOException(java.io.InterruptedIOException) FileWriter(co.cask.cdap.data.file.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) QueueEntry(co.cask.cdap.data2.queue.QueueEntry) TransactionFailureException(org.apache.tephra.TransactionFailureException) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) QueueProducer(co.cask.cdap.data2.queue.QueueProducer) TransactionAware(org.apache.tephra.TransactionAware) Iterator(java.util.Iterator) List(java.util.List)

Example 38 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.

@Test
public void testTimePartitionedFileSet() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(tpfs);
    Assert.assertTrue(tpfs instanceof TransactionAware);
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
    long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
    long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
    Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
    Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
    Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
    FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
    FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
    addTimePartition(tpfs, time1, "file1");
    addTimePartition(tpfs, time2, "file2");
    addTimePartition(tpfs, time3, "file3");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // remove a partition
    dropTimePartition(tpfs, time2);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
    // verify the partition was removed from Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) TransactionAware(org.apache.tephra.TransactionAware) ColumnDesc(co.cask.cdap.proto.ColumnDesc) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 39 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class HiveExploreTableTestRun method testNonAsciiStrings.

@Test
public void testNonAsciiStrings() throws Exception {
    DatasetId ttId = NAMESPACE_ID.dataset("tt");
    datasetFramework.addInstance(Table.class.getName(), ttId, TableProperties.builder().setSchema(Schema.recordOf("record", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.of(Schema.Type.STRING)))).setRowFieldName("a").setExploreTableName("tt").build());
    try {
        // Accessing dataset instance to perform data operations
        Table tt = datasetFramework.getDataset(ttId, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(tt);
        Transaction tx = transactionManager.startShort(100);
        ((TransactionAware) tt).startTx(tx);
        tt.put(new Put("a", "b", "c"));
        // row key and column value are non-ASCII
        tt.put(new Put("ä", "b", "ç"));
        ((TransactionAware) tt).commitTx();
        transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) tt).getTxChanges());
        transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
        ((TransactionAware) tt).postTxCommit();
        ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from tt").get();
        List<Object> columns = results.next().getColumns();
        Assert.assertEquals(2, columns.size());
        Assert.assertEquals("a", columns.get(0));
        Assert.assertEquals("c", columns.get(1));
        columns = results.next().getColumns();
        Assert.assertEquals(2, columns.size());
        Assert.assertEquals("ä", columns.get(0));
        Assert.assertEquals("ç", columns.get(1));
    } finally {
        datasetFramework.deleteInstance(ttId);
    }
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Put(co.cask.cdap.api.dataset.table.Put) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 40 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class HiveExploreObjectMappedTableTestRun method setupTable.

private void setupTable(@Nullable String dbName, @Nullable String tableName) throws Exception {
    if (dbName != null) {
        runCommand(NAMESPACE_ID, "create database if not exists " + dbName, false, null, null);
    }
    datasetFramework.addInstance(ObjectMappedTable.class.getName(), MY_TABLE, setupProperties(dbName, tableName, "row_key"));
    // Accessing dataset instance to perform data operations
    ObjectMappedTable<Record> table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(table);
    TransactionAware txTable = (TransactionAware) table;
    Transaction tx1 = transactionManager.startShort(100);
    txTable.startTx(tx1);
    record1 = new Record(123, 1234567890L, 3.14159f, 3.1415926535, "foobar", new byte[] { 1, 2, 3 });
    record2 = new Record(0 - 987, 9876543210L, 2.71f, 2.71112384, "hello world", new byte[] { 4, 5, 6 });
    table.write("123", record1);
    table.write("456", record2);
    Assert.assertTrue(txTable.commitTx());
    transactionManager.canCommit(tx1.getTransactionId(), txTable.getTxChanges());
    transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
    txTable.postTxCommit();
}
Also used : Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Record(co.cask.cdap.explore.service.datasets.Record) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable)

Aggregations

TransactionAware (org.apache.tephra.TransactionAware)97 Test (org.junit.Test)65 TransactionExecutor (org.apache.tephra.TransactionExecutor)48 Table (co.cask.cdap.api.dataset.table.Table)39 Transaction (org.apache.tephra.Transaction)34 IOException (java.io.IOException)28 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)24 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)23 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)21 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)19 DataSetException (co.cask.cdap.api.dataset.DataSetException)16 List (java.util.List)13 Put (co.cask.cdap.api.dataset.table.Put)12 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 Row (co.cask.cdap.api.dataset.table.Row)11 ArrayList (java.util.ArrayList)11 HashSet (java.util.HashSet)11 TransactionFailureException (org.apache.tephra.TransactionFailureException)11 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)10