use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class NullScanTaskDispatcher method processPath.
private synchronized void processPath(MapWork work, Path path, Collection<String> aliasesToOptimize, boolean isEmpty) {
PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone();
partDesc.setInputFileFormatClass(isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class);
partDesc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class);
partDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, NullStructSerDe.class.getName());
Path fakePath = new Path(NullScanFileSystem.getBase() + partDesc.getTableName() + "/part" + encode(partDesc.getPartSpec()));
StringInternUtils.internUriStringsInPath(fakePath);
work.addPathToPartitionInfo(fakePath, partDesc);
work.addPathToAlias(fakePath, new ArrayList<>(aliasesToOptimize));
Collection<String> aliasesContainingPath = work.getPathToAliases().get(path);
aliasesContainingPath.removeAll(aliasesToOptimize);
if (aliasesContainingPath.isEmpty()) {
work.removePathToAlias(path);
work.removePathToPartitionInfo(path);
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestNullScanTaskDispatcher method addPartitionPath.
private void addPartitionPath(MapWork mapWork, String table, Path path) {
mapWork.addPathToAlias(path, table);
PartitionDesc partitionDesc = new PartitionDesc();
partitionDesc.setProperties(new Properties());
partitionDesc.setPartSpec(new LinkedHashMap<>());
partitionDesc.setTableDesc(mock(TableDesc.class));
mapWork.addPathToPartitionInfo(path, partitionDesc);
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestIcebergInputFormats method testDeriveLlapSetsCacheAffinityForIcebergInputFormat.
@Test
public void testDeriveLlapSetsCacheAffinityForIcebergInputFormat() {
MapWork mapWork = new MapWork();
PartitionDesc partitionDesc = new PartitionDesc();
partitionDesc.setInputFileFormatClass(HiveIcebergInputFormat.class);
mapWork.addPathToPartitionInfo(new Path("/tmp"), partitionDesc);
Configuration job = new Configuration(false);
HiveConf.setVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, "true");
HiveConf.setBoolVar(job, HiveConf.ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED, true);
mapWork.setVectorMode(true);
mapWork.deriveLlap(job, false);
assertTrue("Cache affinity should be set for HiveIcebergInputFormat when LLAP and vectorization is enabled", mapWork.getCacheAffinity());
mapWork.setVectorMode(false);
mapWork.deriveLlap(job, false);
assertFalse("Cache affinity should be disabled for HiveIcebergInputFormat when LLAP is on, but vectorization not", mapWork.getCacheAffinity());
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestHCatMultiOutputFormat method getTableData.
/**
* Method to fetch table data
*
* @param table table name
* @param database database
* @return list of columns in comma seperated way
* @throws Exception if any error occurs
*/
private List<String> getTableData(String table, String database) throws Exception {
QueryState queryState = new QueryState.Builder().build();
HiveConf conf = queryState.getConf();
conf.addResource("hive-site.xml");
ArrayList<String> results = new ArrayList<String>();
ArrayList<String> temp = new ArrayList<String>();
Hive hive = Hive.get(conf);
org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
FetchWork work;
if (!tbl.getPartCols().isEmpty()) {
List<Partition> partitions = hive.getPartitions(tbl);
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
List<Path> partLocs = new ArrayList<Path>();
TableDesc tableDesc = Utilities.getTableDesc(tbl);
for (Partition part : partitions) {
partLocs.add(part.getDataLocation());
partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true));
}
work = new FetchWork(partLocs, partDesc, tableDesc);
work.setLimit(100);
} else {
work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
}
FetchTask task = new FetchTask();
task.setWork(work);
conf.set("_hive.hdfs.session.path", "path");
conf.set("_hive.local.session.path", "path");
task.initialize(queryState, null, null, new org.apache.hadoop.hive.ql.Context(conf));
task.fetch(temp);
for (String str : temp) {
results.add(str.replace("\t", ","));
}
return results;
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project haivvreo by jghoman.
the class AvroSerDe method determineCorrectProperties.
// Hive passes different properties in at different times. If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved). Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
LOG.info("In MR job, extracting table-level properties");
MapWork mapWork = Utilities.getMapWork(configuration);
LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
if (a.size() == 1) {
LOG.info("Only one PartitionDesc found. Returning that Properties");
PartitionDesc p = a.values().iterator().next();
TableDesc tableDesc = p.getTableDesc();
return tableDesc.getProperties();
} else {
String tableName = properties.getProperty("name");
LOG.info("Multiple PartitionDescs. Return properties for " + tableName);
for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
Properties p = partitionDescs.getValue().getTableDesc().getProperties();
if (p.get("name").equals(tableName)) {
// We've found the matching table partition
LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
return p;
}
}
// Didn't find anything in partitions to match on. WARN, at least.
LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
}
}
return properties;
}
Aggregations