use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testCommitterRetryInternal.
// retry committer for 2 times.
private void testCommitterRetryInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
// only attempt for 1 time.
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, 1);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
try {
committer.commitJob(jContext);
Assert.fail("Commit successful: wrong behavior for the first time " + "commit.");
} catch (IOException e) {
// commit again.
try {
committer.commitJob(jContext);
// version 1 shouldn't reach to here.
if (version == 1) {
Assert.fail("Commit successful after retry: wrong behavior for " + "version 1.");
}
} catch (FileNotFoundException ex) {
if (version == 2) {
Assert.fail("Commit failed after retry: wrong behavior for" + " version 2.");
}
assertTrue(ex.getMessage().contains(committer.getJobAttemptPath(jContext).toString() + " does not exist"));
}
}
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testCommitterInternal.
private void testCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testConcurrentCommitTaskWithSubDir.
private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
final Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
final Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
FileSystem.closeAll();
final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
final FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);
amCommitter.setupJob(jContext);
final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
final TextOutputFormat[] tof = new TextOutputFormat[2];
for (int i = 0; i < tof.length; i++) {
tof[i] = new TextOutputFormat() {
@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
}
};
}
final ExecutorService executor = HadoopExecutors.newFixedThreadPool(2);
try {
for (int i = 0; i < taCtx.length; i++) {
final int taskIdx = i;
executor.submit(new Callable<Void>() {
@Override
public Void call() throws IOException, InterruptedException {
final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
outputCommitter.setupTask(taCtx[taskIdx]);
final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
writeOutput(rw, taCtx[taskIdx]);
outputCommitter.commitTask(taCtx[taskIdx]);
return null;
}
});
}
} finally {
executor.shutdown();
while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
LOG.info("Awaiting thread termination!");
}
}
amCommitter.commitJob(jContext);
final RawLocalFileSystem lfs = new RawLocalFileSystem();
lfs.setConf(conf);
assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
// validate output
validateContent(OUT_SUB_DIR);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class Chain method addMapper.
/**
* Add mapper(the first mapper) that reads input from the input
* context and writes to queue
*/
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException {
Configuration conf = getConf(index);
Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class);
RecordReader rr = new ChainRecordReader(inputContext);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf);
Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index));
MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
threads.add(runner);
}
use of org.apache.hadoop.mapreduce.RecordWriter in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
*
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
Aggregations