Search in sources :

Example 86 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class NullScanTaskDispatcher method processPath.

private synchronized void processPath(MapWork work, Path path, Collection<String> aliasesToOptimize, boolean isEmpty) {
    PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone();
    partDesc.setInputFileFormatClass(isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class);
    partDesc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class);
    partDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, NullStructSerDe.class.getName());
    Path fakePath = new Path(NullScanFileSystem.getBase() + partDesc.getTableName() + "/part" + encode(partDesc.getPartSpec()));
    StringInternUtils.internUriStringsInPath(fakePath);
    work.addPathToPartitionInfo(fakePath, partDesc);
    work.addPathToAlias(fakePath, new ArrayList<>(aliasesToOptimize));
    Collection<String> aliasesContainingPath = work.getPathToAliases().get(path);
    aliasesContainingPath.removeAll(aliasesToOptimize);
    if (aliasesContainingPath.isEmpty()) {
        work.removePathToAlias(path);
        work.removePathToPartitionInfo(path);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ZeroRowsInputFormat(org.apache.hadoop.hive.ql.io.ZeroRowsInputFormat) NullStructSerDe(org.apache.hadoop.hive.serde2.NullStructSerDe) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) OneNullRowInputFormat(org.apache.hadoop.hive.ql.io.OneNullRowInputFormat)

Example 87 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class TestNullScanTaskDispatcher method addPartitionPath.

private void addPartitionPath(MapWork mapWork, String table, Path path) {
    mapWork.addPathToAlias(path, table);
    PartitionDesc partitionDesc = new PartitionDesc();
    partitionDesc.setProperties(new Properties());
    partitionDesc.setPartSpec(new LinkedHashMap<>());
    partitionDesc.setTableDesc(mock(TableDesc.class));
    mapWork.addPathToPartitionInfo(path, partitionDesc);
}
Also used : PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties)

Example 88 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class TestIcebergInputFormats method testDeriveLlapSetsCacheAffinityForIcebergInputFormat.

@Test
public void testDeriveLlapSetsCacheAffinityForIcebergInputFormat() {
    MapWork mapWork = new MapWork();
    PartitionDesc partitionDesc = new PartitionDesc();
    partitionDesc.setInputFileFormatClass(HiveIcebergInputFormat.class);
    mapWork.addPathToPartitionInfo(new Path("/tmp"), partitionDesc);
    Configuration job = new Configuration(false);
    HiveConf.setVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, "true");
    HiveConf.setBoolVar(job, HiveConf.ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED, true);
    mapWork.setVectorMode(true);
    mapWork.deriveLlap(job, false);
    assertTrue("Cache affinity should be set for HiveIcebergInputFormat when LLAP and vectorization is enabled", mapWork.getCacheAffinity());
    mapWork.setVectorMode(false);
    mapWork.deriveLlap(job, false);
    assertFalse("Cache affinity should be disabled for HiveIcebergInputFormat when LLAP is on, but vectorization not", mapWork.getCacheAffinity());
}
Also used : Path(org.apache.hadoop.fs.Path) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Configuration(org.apache.hadoop.conf.Configuration) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Test(org.junit.Test)

Example 89 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class TestHCatMultiOutputFormat method getTableData.

/**
 * Method to fetch table data
 *
 * @param table table name
 * @param database database
 * @return list of columns in comma seperated way
 * @throws Exception if any error occurs
 */
private List<String> getTableData(String table, String database) throws Exception {
    QueryState queryState = new QueryState.Builder().build();
    HiveConf conf = queryState.getConf();
    conf.addResource("hive-site.xml");
    ArrayList<String> results = new ArrayList<String>();
    ArrayList<String> temp = new ArrayList<String>();
    Hive hive = Hive.get(conf);
    org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
    FetchWork work;
    if (!tbl.getPartCols().isEmpty()) {
        List<Partition> partitions = hive.getPartitions(tbl);
        List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
        List<Path> partLocs = new ArrayList<Path>();
        TableDesc tableDesc = Utilities.getTableDesc(tbl);
        for (Partition part : partitions) {
            partLocs.add(part.getDataLocation());
            partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true));
        }
        work = new FetchWork(partLocs, partDesc, tableDesc);
        work.setLimit(100);
    } else {
        work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
    }
    FetchTask task = new FetchTask();
    task.setWork(work);
    conf.set("_hive.hdfs.session.path", "path");
    conf.set("_hive.local.session.path", "path");
    task.initialize(queryState, null, null, new org.apache.hadoop.hive.ql.Context(conf));
    task.fetch(temp);
    for (String str : temp) {
        results.add(str.replace("\t", ","));
    }
    return results;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) QueryState(org.apache.hadoop.hive.ql.QueryState) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Hive(org.apache.hadoop.hive.ql.metadata.Hive) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 90 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project haivvreo by jghoman.

the class AvroSerDe method determineCorrectProperties.

// Hive passes different properties in at different times.  If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved).  Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
    if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
        LOG.info("In MR job, extracting table-level properties");
        MapWork mapWork = Utilities.getMapWork(configuration);
        LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
        if (a.size() == 1) {
            LOG.info("Only one PartitionDesc found.  Returning that Properties");
            PartitionDesc p = a.values().iterator().next();
            TableDesc tableDesc = p.getTableDesc();
            return tableDesc.getProperties();
        } else {
            String tableName = properties.getProperty("name");
            LOG.info("Multiple PartitionDescs.  Return properties for " + tableName);
            for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
                Properties p = partitionDescs.getValue().getTableDesc().getProperties();
                if (p.get("name").equals(tableName)) {
                    // We've found the matching table partition
                    LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
                    return p;
                }
            }
            // Didn't find anything in partitions to match on.  WARN, at least.
            LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
        }
    }
    return properties;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)90 Path (org.apache.hadoop.fs.Path)67 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)41 ArrayList (java.util.ArrayList)39 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)27 LinkedHashMap (java.util.LinkedHashMap)24 List (java.util.List)23 JobConf (org.apache.hadoop.mapred.JobConf)21 Map (java.util.Map)18 Properties (java.util.Properties)18 HashMap (java.util.HashMap)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)17 IOException (java.io.IOException)15 Operator (org.apache.hadoop.hive.ql.exec.Operator)15 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)14 Configuration (org.apache.hadoop.conf.Configuration)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 FileSystem (org.apache.hadoop.fs.FileSystem)11 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)9 HiveInputFormat (org.apache.hadoop.hive.ql.io.HiveInputFormat)9