use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.
the class AbstractNodeTupleOutputFormatTests method testOutput.
/**
* Tests output
*
* @param f
* File to output to
* @param num
* Number of tuples to output
* @throws IOException
* @throws InterruptedException
*/
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
// Prepare configuration
Configuration config = this.prepareConfiguration();
// Set up fake job
OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
Job job = Job.getInstance(config);
job.setOutputFormatClass(outputFormat.getClass());
this.addOutputPath(f, job.getConfiguration(), job);
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
Assert.assertNotNull(FileOutputFormat.getOutputPath(context));
// Output the data
TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
Iterator<T> tuples = this.generateTuples(num);
while (tuples.hasNext()) {
writer.write(NullWritable.get(), tuples.next());
}
writer.close(taskContext);
// Check output
File outputFile = this.findOutputFile(this.folder.getRoot(), context);
Assert.assertNotNull(outputFile);
this.checkTuples(outputFile, num);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.
the class AbstractNodeTupleInputFormatTests method testSingleInput.
/**
* Runs a test with a single input
*
* @param config
* Configuration
* @param input
* Input
* @param expectedTuples
* Expected tuples
* @throws IOException
* @throws InterruptedException
*/
protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) throws IOException, InterruptedException {
// Set up fake job
InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
this.addInputPath(input, job.getConfiguration(), job);
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length);
NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE);
// Check splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(expectedSplits, splits.size());
// Check tuples
for (InputSplit split : splits) {
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
reader.initialize(split, taskContext);
this.checkTuples(reader, expectedTuples);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project cdap by caskdata.
the class MapReduceTaskContextProvider method createCacheLoader.
/**
* Creates a {@link CacheLoader} for the task context cache.
*/
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
final SecureStore secureStore = injector.getInstance(SecureStore.class);
final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
final MessagingService messagingService = injector.getInstance(MessagingService.class);
// Multiple instances of BasicMapReduceTaskContext can shares the same program.
final AtomicReference<Program> programRef = new AtomicReference<>();
return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {
@Override
public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
Program program = programRef.get();
if (program == null) {
// Creation of program is relatively cheap, so just create and do compare and set.
programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
program = programRef.get();
}
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
MetricsCollectionService metricsCollectionService = null;
MapReduceMetrics.TaskType taskType = null;
String taskId = null;
TaskAttemptID taskAttemptId = key.getTaskAttemptID();
// from a org.apache.hadoop.io.RawComparator
if (taskAttemptId != null) {
taskId = taskAttemptId.getTaskID().toString();
if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
// if this is not for a mapper or a reducer, we don't need the metrics collection service
metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
}
}
CConfiguration cConf = injector.getInstance(CConfiguration.class);
TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
return new BasicMapReduceTaskContext(program, contextConfig.getProgramOptions(), cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, contextConfig.getTx(), programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService);
}
};
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.
the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
writer.append(bytes);
}
writer.close();
RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
List<InputSplit> splits = inputFormat.getSplits(context);
assertEquals("splits length should be " + splitNumber, splitNumber, splits.size());
int readCount = 0;
for (int i = 0; i < splits.size(); i++) {
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
rr.initialize(splits.get(i), tac);
while (rr.nextKeyValue()) {
readCount++;
}
}
assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.
the class LlapBaseInputFormat method getRecordReader.
@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
LlapInputSplit llapSplit = (LlapInputSplit) split;
// Set conf to use LLAP user rather than current user for LLAP Zk registry.
HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
LlapServiceInstance serviceInstance = getServiceInstance(job, llapSplit);
String host = serviceInstance.getHost();
int llapSubmitPort = serviceInstance.getRpcPort();
LOG.info("Found service instance for host " + host + " with rpc port " + llapSubmitPort + " and outputformat port " + serviceInstance.getOutputFormatPort());
byte[] llapTokenBytes = llapSplit.getTokenBytes();
Token<LlapTokenIdentifier> llapToken = null;
if (llapTokenBytes != null) {
DataInputBuffer in = new DataInputBuffer();
in.reset(llapTokenBytes, 0, llapTokenBytes.length);
llapToken = new Token<LlapTokenIdentifier>();
llapToken.readFields(in);
}
LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
int attemptNum = 0;
// Use task attempt number from conf if provided
TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
if (taskAttemptId != null) {
attemptNum = taskAttemptId.getId();
if (LOG.isDebugEnabled()) {
LOG.debug("Setting attempt number to " + attemptNum + " from task attempt ID in conf: " + job.get(MRJobConfig.TASK_ATTEMPT_ID));
}
}
SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, llapSplit.getSplitNum(), attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit.getFragmentBytes(), llapSplit.getFragmentBytesSignature(), job);
llapClient.submitWork(request, host, llapSubmitPort);
Socket socket = new Socket(host, serviceInstance.getOutputFormatPort());
LOG.debug("Socket connected");
SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
OutputStream socketStream = socket.getOutputStream();
LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
if (llapSplit.getTokenBytes() != null) {
builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
}
builder.build().writeDelimitedTo(socketStream);
socketStream.flush();
LOG.info("Registered id: " + fragmentId);
@SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), Text.class, job, llapClient, (java.io.Closeable) socket);
umbilicalResponder.setRecordReader(recordReader);
return recordReader;
}
Aggregations