Search in sources :

Example 31 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class MultiOutputFormat method getTaskAttemptContext.

/**
   * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias
   * and the actual TaskAttemptContext
   * @param alias the name given to the OutputFormat configuration
   * @param context the Mapper or Reducer Context
   * @return a copy of the TaskAttemptContext with the alias configuration populated
   */
public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) {
    String aliasConf = context.getConfiguration().get(getAliasConfName(alias));
    TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID());
    addToConfig(aliasConf, aliasContext.getConfiguration());
    return aliasContext;
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 32 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.

the class TestHFileOutputFormat2 method testColumnFamilySettings.

/**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
   * bloom filter settings from the column family descriptor
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");
    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
        htd.addFamily(hcd);
    }
    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);
    try {
        // partial map red setup to get an operational writer for testing
        // We turn off the sequence file compression, because DefaultCodec
        // pollutes the GZip codec pool with an incompatible compressor.
        conf.set("io.seqfile.compression.type", "NONE");
        conf.set("hbase.fs.tmp.dir", dir.toString());
        // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
        conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
        Job job = new Job(conf, "testLocalMRIncrementalLoad");
        job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
        setupRandomGeneratorMapper(job, false);
        HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // write out random rows
        writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
        writer.close(context);
        // Make sure that a directory was created for every CF
        FileSystem fs = dir.getFileSystem(conf);
        // commit so that the filesystem has one directory per column family
        hof.getOutputCommitter(context).commitTask(context);
        hof.getOutputCommitter(context).commitJob(context);
        FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
        assertEquals(htd.getFamilies().size(), families.length);
        for (FileStatus f : families) {
            String familyStr = f.getPath().getName();
            HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
            // verify that the compression on this file matches the configured
            // compression
            Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
            Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
            Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
            byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
            if (bloomFilter == null)
                bloomFilter = Bytes.toBytes("NONE");
            assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
            assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
        }
    } finally {
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) FSUtils(org.apache.hadoop.hbase.util.FSUtils) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 33 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.

the class TestHFileOutputFormat2 method test_LATEST_TIMESTAMP_isReplaced.

/**
   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
   * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
    try {
        Job job = new Job(conf);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        final byte[] b = Bytes.toBytes("b");
        // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
        // changed by call to write.  Check all in kv is same but ts.
        KeyValue kv = new KeyValue(b, b, b);
        KeyValue original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertFalse(original.equals(kv));
        assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
        assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
        assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
        assertNotSame(original.getTimestamp(), kv.getTimestamp());
        assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
        // Test 2. Now test passing a kv that has explicit ts.  It should not be
        // changed by call to record write.
        kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
        original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertTrue(original.equals(kv));
    } finally {
        if (writer != null && context != null)
            writer.close(context);
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 34 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class TestE2EScenarios method createTaskAttemptContext.

private TaskAttemptContext createTaskAttemptContext(Configuration tconf) {
    Configuration conf = (tconf == null) ? (new Configuration()) : tconf;
    TaskAttemptID taskId = HCatMapRedUtil.createTaskAttemptID(new JobID("200908190029", 1), false, 1, 1);
    conf.setInt("mapred.task.partition", taskId.getId());
    conf.set("mapred.task.id", taskId.toString());
    TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf, taskId);
    return rtaskContext;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobID(org.apache.hadoop.mapreduce.JobID)

Example 35 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();
    RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
    Configuration jonconf = new Configuration(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    JobContext context = new Job(jonconf);
    HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
    List<InputSplit> splits = inputFormat.getSplits(context);
    assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.size(); i++) {
        TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
        RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
        rr.initialize(splits.get(i), tac);
        while (rr.nextKeyValue()) {
            readCount++;
        }
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RCFile(org.apache.hadoop.hive.ql.io.RCFile) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10