Search in sources :

Example 1 with StringableMap

use of org.apache.hadoop.hive.common.StringableMap in project hive by apache.

the class CompactorMR method overrideTblProps.

/**
 * Parse tblproperties specified on "ALTER TABLE ... COMPACT ... WITH OVERWRITE TBLPROPERTIES ..."
 * and override two categories of properties:
 * 1. properties of the compactor MR job (with prefix "compactor.")
 * 2. general hive properties (with prefix "tblprops.")
 * @param job the compactor MR job
 * @param tblproperties existing tblproperties
 * @param properties table properties
 */
private void overrideTblProps(JobConf job, Map<String, String> tblproperties, String properties) {
    StringableMap stringableMap = new StringableMap(properties);
    overrideMRProps(job, stringableMap);
    // mingle existing tblproperties with those specified on the ALTER TABLE command
    for (String key : stringableMap.keySet()) {
        if (key.startsWith(TBLPROPS_PREFIX)) {
            // 9 is the length of "tblprops.". We only keep the rest
            String propKey = key.substring(9);
            tblproperties.put(propKey, stringableMap.get(key));
        }
    }
    // re-set TABLE_PROPS with reloaded tblproperties
    job.set(TABLE_PROPS, new StringableMap(tblproperties).toString());
}
Also used : StringableMap(org.apache.hadoop.hive.common.StringableMap)

Example 2 with StringableMap

use of org.apache.hadoop.hive.common.StringableMap in project hive by apache.

the class TestCompactorMRJobQueueConfiguration method testCreateBaseJobConfHasCorrectJobQueue.

@ParameterizedTest
@MethodSource("generateBaseJobConfSetup")
void testCreateBaseJobConfHasCorrectJobQueue(ConfSetup input) {
    Table tbl = createPersonTable();
    tbl.setParameters(input.tableProperties);
    CompactorMR compactor = new CompactorMR();
    CompactionInfo ci = new CompactionInfo(tbl.getDbName(), tbl.getTableName(), null, CompactionType.MAJOR);
    ci.properties = new StringableMap(input.compactionProperties).toString();
    HiveConf conf = new HiveConf();
    input.confProperties.forEach(conf::set);
    JobConf c = compactor.createBaseJobConf(conf, "test-job", tbl, tbl.getSd(), new ValidReaderWriteIdList(), ci);
    assertEquals(input.expectedQueue, c.getQueueName(), "Test failed for the following input:" + input);
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) HiveConf(org.apache.hadoop.hive.conf.HiveConf) StringableMap(org.apache.hadoop.hive.common.StringableMap) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) JobConf(org.apache.hadoop.mapred.JobConf) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 3 with StringableMap

use of org.apache.hadoop.hive.common.StringableMap in project hive by apache.

the class CompactorMR method createBaseJobConf.

private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd, ValidTxnList txns, CompactionInfo ci) {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);
    String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE);
    if (queueName != null && queueName.length() > 0) {
        job.setQueueName(queueName);
    }
    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    // override MR properties from tblproperties if applicable
    overrideMRProps(job, t.getParameters());
    if (ci.properties != null) {
        overrideTblProps(job, t.getParameters(), ci.properties);
    }
    setColumnTypes(job, sd.getCols());
    //with feature on, multiple tasks may get into conflict creating/using TMP_LOCATION and if we were
    //to generate the target dir in the Map task, there is no easy way to pass it to OutputCommitter
    //to do the final move
    job.setBoolean("mapreduce.map.speculative", false);
    // Set appropriate Acid readers/writers based on the table properties.
    AcidUtils.setAcidOperationalProperties(job, AcidUtils.getAcidOperationalProperties(t.getParameters()));
    return job;
}
Also used : StringableMap(org.apache.hadoop.hive.common.StringableMap) JobConf(org.apache.hadoop.mapred.JobConf)

Example 4 with StringableMap

use of org.apache.hadoop.hive.common.StringableMap in project hive by apache.

the class TestWorker method stringableMap.

@Test
public void stringableMap() throws Exception {
    // Empty map case
    StringableMap m = new StringableMap(new HashMap<String, String>());
    String s = m.toString();
    Assert.assertEquals("0:", s);
    m = new StringableMap(s);
    Assert.assertEquals(0, m.size());
    Map<String, String> base = new HashMap<String, String>();
    base.put("mary", "poppins");
    base.put("bert", null);
    base.put(null, "banks");
    m = new StringableMap(base);
    s = m.toString();
    m = new StringableMap(s);
    Assert.assertEquals(3, m.size());
    Map<String, Boolean> saw = new HashMap<String, Boolean>(3);
    saw.put("mary", false);
    saw.put("bert", false);
    saw.put(null, false);
    for (Map.Entry<String, String> e : m.entrySet()) {
        saw.put(e.getKey(), true);
        if ("mary".equals(e.getKey()))
            Assert.assertEquals("poppins", e.getValue());
        else if ("bert".equals(e.getKey()))
            Assert.assertNull(e.getValue());
        else if (null == e.getKey())
            Assert.assertEquals("banks", e.getValue());
        else
            Assert.fail("Unexpected value " + e.getKey());
    }
    Assert.assertEquals(3, saw.size());
    Assert.assertTrue(saw.get("mary"));
    Assert.assertTrue(saw.get("bert"));
    Assert.assertTrue(saw.get(null));
}
Also used : HashMap(java.util.HashMap) StringableMap(org.apache.hadoop.hive.common.StringableMap) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Map(java.util.Map) StringableMap(org.apache.hadoop.hive.common.StringableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 5 with StringableMap

use of org.apache.hadoop.hive.common.StringableMap in project hive by apache.

the class CompactorMR method createBaseJobConf.

@VisibleForTesting
public JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd, ValidWriteIdList writeIds, CompactionInfo ci) {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);
    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, generateTmpPath(sd));
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
    // override MR properties from tblproperties if applicable
    overrideMRProps(job, t.getParameters());
    if (ci.properties != null) {
        overrideTblProps(job, t.getParameters(), ci.properties);
    }
    String queueName = CompactorUtil.getCompactorJobQueueName(conf, ci, t);
    if (!queueName.isEmpty()) {
        job.setQueueName(queueName);
    }
    // have to use table columns since partition SD isn't updated if these are altered
    setColumnTypes(job, t.getSd().getCols());
    // with feature on, multiple tasks may get into conflict creating/using TMP_LOCATION and if we were
    // to generate the target dir in the Map task, there is no easy way to pass it to OutputCommitter
    // to do the final move
    job.setBoolean("mapreduce.map.speculative", false);
    // Set appropriate Acid readers/writers based on the table properties.
    AcidUtils.setAcidOperationalProperties(job, true, AcidUtils.getAcidOperationalProperties(t.getParameters()));
    return job;
}
Also used : StringableMap(org.apache.hadoop.hive.common.StringableMap) JobConf(org.apache.hadoop.mapred.JobConf) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

StringableMap (org.apache.hadoop.hive.common.StringableMap)6 JobConf (org.apache.hadoop.mapred.JobConf)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Function (java.util.function.Function)1 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 Table (org.apache.hadoop.hive.metastore.api.Table)1 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)1 Test (org.junit.Test)1 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)1 MethodSource (org.junit.jupiter.params.provider.MethodSource)1