use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class DynamicDatasetCacheTest method getTxAwares.
private List<TestDataset> getTxAwares() {
SortedSet<TestDataset> set = new TreeSet<>();
for (TransactionAware txAware : cache.getTransactionAwares()) {
TestDataset dataset = (TestDataset) txAware;
set.add(dataset);
}
return ImmutableList.copyOf(set);
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class InMemoryStreamFileWriterFactory method create.
@Override
public FileWriter<StreamEvent> create(StreamConfig config, int generation) throws IOException {
final QueueProducer producer = queueClientFactory.createProducer(QueueName.fromStream(config.getStreamId()));
final List<TransactionAware> txAwares = Lists.newArrayList();
if (producer instanceof TransactionAware) {
txAwares.add((TransactionAware) producer);
}
final TransactionExecutor txExecutor = executorFactory.createExecutor(txAwares);
// Adapt the FileWriter interface into Queue2Producer
return new FileWriter<StreamEvent>() {
private final List<StreamEvent> events = Lists.newArrayList();
@Override
public void append(StreamEvent event) throws IOException {
events.add(event);
}
@Override
public void appendAll(Iterator<? extends StreamEvent> events) throws IOException {
Iterators.addAll(this.events, events);
}
@Override
public void close() throws IOException {
producer.close();
}
@Override
public void flush() throws IOException {
try {
txExecutor.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (StreamEvent event : events) {
producer.enqueue(new QueueEntry(STREAM_EVENT_CODEC.encodePayload(event)));
}
events.clear();
}
});
} catch (TransactionFailureException e) {
throw new IOException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException();
}
}
};
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.
@Test
public void testTimePartitionedFileSet() throws Exception {
final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
final String tableName = getDatasetHiveName(datasetInstanceId);
// create a time partitioned file set
datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
// Accessing dataset instance to perform data operations
TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(tpfs);
Assert.assertTrue(tpfs instanceof TransactionAware);
// add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
addTimePartition(tpfs, time1, "file1");
addTimePartition(tpfs, time2, "file2");
addTimePartition(tpfs, time3, "file3");
// verify that the partitions were added to Hive
runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
// remove a partition
dropTimePartition(tpfs, time2);
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
// verify the partition was removed from Hive
runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
// drop the dataset
datasetFramework.deleteInstance(datasetInstanceId);
// verify the Hive table is gone
runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class HiveExploreTableTestRun method testNonAsciiStrings.
@Test
public void testNonAsciiStrings() throws Exception {
DatasetId ttId = NAMESPACE_ID.dataset("tt");
datasetFramework.addInstance(Table.class.getName(), ttId, TableProperties.builder().setSchema(Schema.recordOf("record", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.of(Schema.Type.STRING)))).setRowFieldName("a").setExploreTableName("tt").build());
try {
// Accessing dataset instance to perform data operations
Table tt = datasetFramework.getDataset(ttId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(tt);
Transaction tx = transactionManager.startShort(100);
((TransactionAware) tt).startTx(tx);
tt.put(new Put("a", "b", "c"));
// row key and column value are non-ASCII
tt.put(new Put("ä", "b", "ç"));
((TransactionAware) tt).commitTx();
transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) tt).getTxChanges());
transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
((TransactionAware) tt).postTxCommit();
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from tt").get();
List<Object> columns = results.next().getColumns();
Assert.assertEquals(2, columns.size());
Assert.assertEquals("a", columns.get(0));
Assert.assertEquals("c", columns.get(1));
columns = results.next().getColumns();
Assert.assertEquals(2, columns.size());
Assert.assertEquals("ä", columns.get(0));
Assert.assertEquals("ç", columns.get(1));
} finally {
datasetFramework.deleteInstance(ttId);
}
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class HiveExploreObjectMappedTableTestRun method setupTable.
private void setupTable(@Nullable String dbName, @Nullable String tableName) throws Exception {
if (dbName != null) {
runCommand(NAMESPACE_ID, "create database if not exists " + dbName, false, null, null);
}
datasetFramework.addInstance(ObjectMappedTable.class.getName(), MY_TABLE, setupProperties(dbName, tableName, "row_key"));
// Accessing dataset instance to perform data operations
ObjectMappedTable<Record> table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
TransactionAware txTable = (TransactionAware) table;
Transaction tx1 = transactionManager.startShort(100);
txTable.startTx(tx1);
record1 = new Record(123, 1234567890L, 3.14159f, 3.1415926535, "foobar", new byte[] { 1, 2, 3 });
record2 = new Record(0 - 987, 9876543210L, 2.71f, 2.71112384, "hello world", new byte[] { 4, 5, 6 });
table.write("123", record1);
table.write("456", record2);
Assert.assertTrue(txTable.commitTx());
transactionManager.canCommit(tx1.getTransactionId(), txTable.getTxChanges());
transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
txTable.postTxCommit();
}
Aggregations