use of org.apache.tephra.TransactionCodec in project cdap by caskdata.
the class HBaseTableTest method testCachedEncodedTransaction.
@Test
public void testCachedEncodedTransaction() throws Exception {
String tableName = "testEncodedTxTable";
DatasetProperties props = DatasetProperties.EMPTY;
getTableAdmin(CONTEXT1, tableName, props).create();
DatasetSpecification tableSpec = DatasetSpecification.builder(tableName, HBaseTable.class.getName()).build();
// use a transaction codec that counts the number of times encode() is called
final AtomicInteger encodeCount = new AtomicInteger();
final TransactionCodec codec = new TransactionCodec() {
@Override
public byte[] encode(Transaction tx) throws IOException {
encodeCount.incrementAndGet();
return super.encode(tx);
}
};
// use a table util that creates an HTable that validates the encoded tx on each get
final AtomicReference<Transaction> txRef = new AtomicReference<>();
HBaseTableUtil util = new DelegatingHBaseTableUtil(hBaseTableUtil) {
@Override
public HTable createHTable(Configuration conf, TableId tableId) throws IOException {
HTable htable = super.createHTable(conf, tableId);
return new MinimalDelegatingHTable(htable) {
@Override
public Result get(org.apache.hadoop.hbase.client.Get get) throws IOException {
Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(get.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
return super.get(get);
}
@Override
public Result[] get(List<org.apache.hadoop.hbase.client.Get> gets) throws IOException {
for (org.apache.hadoop.hbase.client.Get get : gets) {
Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(get.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
}
return super.get(gets);
}
@Override
public ResultScanner getScanner(org.apache.hadoop.hbase.client.Scan scan) throws IOException {
Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(scan.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
return super.getScanner(scan);
}
};
}
};
HBaseTable table = new HBaseTable(CONTEXT1, tableSpec, Collections.<String, String>emptyMap(), cConf, TEST_HBASE.getConfiguration(), util, codec);
DetachedTxSystemClient txSystemClient = new DetachedTxSystemClient();
// test all operations: only the first one encodes
Transaction tx = txSystemClient.startShort();
txRef.set(tx);
table.startTx(tx);
table.put(b("row1"), b("col1"), b("val1"));
Assert.assertEquals(0, encodeCount.get());
table.get(b("row"));
Assert.assertEquals(1, encodeCount.get());
table.get(ImmutableList.of(new Get("a"), new Get("b")));
Assert.assertEquals(1, encodeCount.get());
Scanner scanner = table.scan(new Scan(null, null));
Assert.assertEquals(1, encodeCount.get());
scanner.close();
table.increment(b("z"), b("z"), 0L);
Assert.assertEquals(1, encodeCount.get());
table.commitTx();
table.postTxCommit();
// test that for the next tx, we encode again
tx = txSystemClient.startShort();
txRef.set(tx);
table.startTx(tx);
table.get(b("row"));
Assert.assertEquals(2, encodeCount.get());
table.commitTx();
// test that we encode again, even of postTxCommit was not called
tx = txSystemClient.startShort();
txRef.set(tx);
table.startTx(tx);
table.get(b("row"));
Assert.assertEquals(3, encodeCount.get());
table.commitTx();
table.rollbackTx();
// test that rollback does not encode the tx
Assert.assertEquals(3, encodeCount.get());
// test that we encode again if the previous tx rolled back
tx = txSystemClient.startShort();
txRef.set(tx);
table.startTx(tx);
table.get(b("row"));
Assert.assertEquals(4, encodeCount.get());
table.commitTx();
table.close();
Assert.assertEquals(4, encodeCount.get());
}
use of org.apache.tephra.TransactionCodec in project cdap by caskdata.
the class HBaseTableExporter method createSubmittableJob.
/**
* Sets up the actual MapReduce job.
* @param tx The transaction which needs to be passed to the Scan instance. This transaction is be used by
* coprocessors to filter out the data corresonding to the invalid transactions .
* @param tableName Name of the table which need to be exported as HFiles.
* @return the configured job
* @throws IOException
*/
public Job createSubmittableJob(Transaction tx, String tableName) throws IOException {
Job job = Job.getInstance(hConf, "HBaseTableExporter");
job.setJarByClass(HBaseTableExporter.class);
Scan scan = new Scan();
scan.setCacheBlocks(false);
// Set the transaction attribute for the scan.
scan.setAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY, new TransactionCodec().encode(tx));
job.setNumReduceTasks(0);
TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null, null, job);
FileSystem fs = FileSystem.get(hConf);
Random rand = new Random();
Path root = new Path(fs.getWorkingDirectory(), "hbasetableexporter");
fs.mkdirs(root);
while (true) {
bulkloadDir = new Path(root, "" + rand.nextLong());
if (!fs.exists(bulkloadDir)) {
break;
}
}
HFileOutputFormat2.setOutputPath(job, bulkloadDir);
HTable hTable = new HTable(hConf, tableName);
HFileOutputFormat2.configureIncrementalLoad(job, hTable);
return job;
}
use of org.apache.tephra.TransactionCodec in project cdap by caskdata.
the class MapReduceTaskContextProvider method createCacheLoader.
/**
* Creates a {@link CacheLoader} for the task context cache.
*/
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
final SecureStore secureStore = injector.getInstance(SecureStore.class);
final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
final MessagingService messagingService = injector.getInstance(MessagingService.class);
// Multiple instances of BasicMapReduceTaskContext can share the same program.
final AtomicReference<Program> programRef = new AtomicReference<>();
return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {
@Override
public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
TaskAttemptID taskAttemptId = key.getTaskAttemptID();
// taskAttemptId could be null if used from a org.apache.hadoop.mapreduce.Partitioner or
// from a org.apache.hadoop.io.RawComparator, in which case we can get the JobId from the conf. Note that the
// JobId isn't in the conf for the OutputCommitter#setupJob method, in which case we use the taskAttemptId
Path txFile = MainOutputCommitter.getTxFile(key.getConfiguration(), taskAttemptId != null ? taskAttemptId.getJobID() : null);
FileSystem fs = txFile.getFileSystem(key.getConfiguration());
Preconditions.checkArgument(fs.exists(txFile));
Transaction tx;
try (FSDataInputStream txFileInputStream = fs.open(txFile)) {
byte[] txByteArray = ByteStreams.toByteArray(txFileInputStream);
tx = new TransactionCodec().decode(txByteArray);
}
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
Program program = programRef.get();
if (program == null) {
// Creation of program is relatively cheap, so just create and do compare and set.
programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
program = programRef.get();
}
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
MetricsCollectionService metricsCollectionService = null;
MapReduceMetrics.TaskType taskType = null;
String taskId = null;
ProgramOptions options = contextConfig.getProgramOptions();
// from a org.apache.hadoop.io.RawComparator
if (taskAttemptId != null) {
taskId = taskAttemptId.getTaskID().toString();
if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
// if this is not for a mapper or a reducer, we don't need the metrics collection service
metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
options = new SimpleProgramOptions(options.getProgramId(), options.getArguments(), new BasicArguments(RuntimeArguments.extractScope("task", taskType.toString().toLowerCase(), contextConfig.getProgramOptions().getUserArguments().asMap())), options.isDebug());
}
}
CConfiguration cConf = injector.getInstance(CConfiguration.class);
TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
return new BasicMapReduceTaskContext(program, options, cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, tx, programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService, mapReduceClassLoader);
}
};
}
use of org.apache.tephra.TransactionCodec in project cdap by caskdata.
the class MainOutputCommitter method setupJob.
@Override
public void setupJob(JobContext jobContext) throws IOException {
Configuration configuration = jobContext.getConfiguration();
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
MapReduceTaskContextProvider taskContextProvider = classLoader.getTaskContextProvider();
Injector injector = taskContextProvider.getInjector();
cConf = injector.getInstance(CConfiguration.class);
MapReduceContextConfig contextConfig = new MapReduceContextConfig(jobContext.getConfiguration());
ProgramId programId = contextConfig.getProgramId();
LOG.info("Setting up for MapReduce job: namespaceId={}, applicationId={}, program={}, runid={}", programId.getNamespace(), programId.getApplication(), programId.getProgram(), ProgramRunners.getRunId(contextConfig.getProgramOptions()));
RetryStrategy retryStrategy = SystemArguments.getRetryStrategy(contextConfig.getProgramOptions().getUserArguments().asMap(), contextConfig.getProgramId().getType(), cConf);
this.txClient = new RetryingLongTransactionSystemClient(injector.getInstance(TransactionSystemClient.class), retryStrategy);
// We start long-running tx to be used by mapreduce job tasks.
this.transaction = txClient.startLong();
// Write the tx somewhere, so that we can re-use it in mapreduce tasks
Path txFile = getTxFile(configuration, jobContext.getJobID());
FileSystem fs = txFile.getFileSystem(configuration);
try (FSDataOutputStream fsDataOutputStream = fs.create(txFile, false)) {
fsDataOutputStream.write(new TransactionCodec().encode(transaction));
}
// we can instantiate the TaskContext after we set the tx above. It's used by the operations below
taskContext = taskContextProvider.get(taskAttemptContext);
this.outputs = Outputs.transform(contextConfig.getOutputs(), taskContext);
super.setupJob(jobContext);
}
Aggregations