use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class MultiOutputFormat method getTaskAttemptContext.
/**
* Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
* and the actual TaskAttemptContext
* @param alias the name given to the OutputFormat configuration
* @param context the Mapper or Reducer Context
* @return a copy of the TaskAttemptContext with the alias configuration populated
*/
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
addToConfig(aliasConf, aliasContext.getConfiguration());
return aliasContext;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.
the class TestHFileOutputFormat2 method testColumnFamilySettings.
/**
* Test that {@link HFileOutputFormat2} RecordWriter uses compression and
* bloom filter settings from the column family descriptor
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("testColumnFamilySettings");
// Setup table descriptor
Table table = Mockito.mock(Table.class);
RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
Mockito.doReturn(htd).when(table).getTableDescriptor();
for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
htd.addFamily(hcd);
}
// set up the table to return some mock keys
setupMockStartKeys(regionLocator);
try {
// partial map red setup to get an operational writer for testing
// We turn off the sequence file compression, because DefaultCodec
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
conf.set("hbase.fs.tmp.dir", dir.toString());
// turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job, false);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// write out random rows
writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
assertEquals(htd.getFamilies().size(), families.length);
for (FileStatus f : families) {
String familyStr = f.getPath().getName();
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
// verify that the compression on this file matches the configured
// compression
Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
if (bloomFilter == null)
bloomFilter = Bytes.toBytes("NONE");
assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
}
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.
the class TestHFileOutputFormat2 method test_LATEST_TIMESTAMP_isReplaced.
/**
* Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
* passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
try {
Job job = new Job(conf);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
final byte[] b = Bytes.toBytes("b");
// Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be
// changed by call to write. Check all in kv is same but ts.
KeyValue kv = new KeyValue(b, b, b);
KeyValue original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertFalse(original.equals(kv));
assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
assertNotSame(original.getTimestamp(), kv.getTimestamp());
assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
// Test 2. Now test passing a kv that has explicit ts. It should not be
// changed by call to record write.
kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertTrue(original.equals(kv));
} finally {
if (writer != null && context != null)
writer.close(context);
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class TestE2EScenarios method createTaskAttemptContext.
private TaskAttemptContext createTaskAttemptContext(Configuration tconf) {
Configuration conf = (tconf == null) ? (new Configuration()) : tconf;
TaskAttemptID taskId = HCatMapRedUtil.createTaskAttemptID(new JobID("200908190029", 1), false, 1, 1);
conf.setInt("mapred.task.partition", taskId.getId());
conf.set("mapred.task.id", taskId.toString());
TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf, taskId);
return rtaskContext;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
writer.append(bytes);
}
writer.close();
RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
List<InputSplit> splits = inputFormat.getSplits(context);
assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
int readCount = 0;
for (int i = 0; i < splits.size(); i++) {
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
rr.initialize(splits.get(i), tac);
while (rr.nextKeyValue()) {
readCount++;
}
}
assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
Aggregations