use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestHiveBinarySearchRecordReader method init.
private void init() throws IOException {
conf = new JobConf();
resetIOContext();
rcfReader = mock(RCFileRecordReader.class);
when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
// Since the start is 0, and the length is 100, the first call to sync should be with the value
// 50 so return that for getPos()
when(rcfReader.getPos()).thenReturn(50L);
conf.setBoolean("hive.input.format.sorted", true);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
hiveSplit = new TestHiveInputSplit();
hbsReader = new TestHiveRecordReader(rcfReader, conf);
hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class MapOperator method initObjectInspector.
private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
PartitionDesc pd = opCtx.partDesc;
TableDesc td = pd.getTableDesc();
// Use table properties in case of unpartitioned tables,
// and the union of table properties and partition properties, with partition
// taking precedence, in the case of partitioned tables
Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());
Map<String, String> partSpec = pd.getPartSpec();
opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
opCtx.partName = String.valueOf(partSpec);
opCtx.deserializer = pd.getDeserializer(hconf);
StructObjectInspector partRawRowObjectInspector;
boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties());
if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
partRawRowObjectInspector = tableRowOI;
} else {
partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
}
opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);
// Next check if this table has partitions and if so
// get the list of partition names as well as allocate
// the serdes for the partition columns
String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
if (pcols != null && pcols.length() > 0) {
String[] partKeys = pcols.trim().split("/");
String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
String[] partKeyTypes = pcolTypes.trim().split(":");
if (partKeys.length > partKeyTypes.length) {
throw new HiveException("Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length);
}
List<String> partNames = new ArrayList<String>(partKeys.length);
Object[] partValues = new Object[partKeys.length];
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);
for (int i = 0; i < partKeys.length; i++) {
String key = partKeys[i];
partNames.add(key);
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
// Partitions do not exist for this table
if (partSpec == null) {
// for partitionless table, initialize partValue to null
partValues[i] = null;
} else {
partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partSpec.get(key));
}
partObjectInspectors.add(oi);
}
opCtx.rowWithPart = new Object[] { null, partValues };
opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
}
// In that case, it will be a Select, but the rowOI need not be amended
if (opCtx.op instanceof TableScanOperator) {
TableScanOperator tsOp = (TableScanOperator) opCtx.op;
TableScanDesc tsDesc = tsOp.getConf();
if (tsDesc != null && tsDesc.hasVirtualCols()) {
opCtx.vcs = tsDesc.getVirtualCols();
opCtx.vcValues = new Object[opCtx.vcs.size()];
opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
if (opCtx.isPartitioned()) {
opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
} else {
opCtx.rowWithPartAndVC = new Object[2];
}
}
}
if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
opCtx.rowObjectInspector = tableRowOI;
return opCtx;
}
List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
inspectors.add(tableRowOI);
if (opCtx.isPartitioned()) {
inspectors.add(opCtx.partObjectInspector);
}
if (opCtx.hasVC()) {
inspectors.add(opCtx.vcsObjectInspector);
}
opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
return opCtx;
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class MapOperator method initEmptyInputChildren.
/*
* This is the same as the setChildren method below but for empty tables.
* It takes care of the following:
* 1. Create the right object inspector.
* 2. Set up the childrenOpToOI with the object inspector.
* So as to ensure that the initialization happens correctly.
*/
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception {
setChildOperators(children);
Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
for (Operator<?> child : children) {
TableScanOperator tsOp = (TableScanOperator) child;
StructObjectInspector soi = null;
PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName());
Deserializer serde = partDesc.getTableDesc().getDeserializer();
partDesc.setProperties(partDesc.getProperties());
MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
initObjectInspector(newConf, opCtx, tableRowOI);
soi = opCtx.rowObjectInspector;
child.getParentOperators().add(this);
childrenOpToOI.put(child, soi);
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class MapOperator method getConvertedOI.
// Return the mapping for table descriptor to the expected table OI
/**
* Traverse all the partitions for a table, and get the OI for the table.
* Note that a conversion is required if any of the partition OI is different
* from the table OI. For eg. if the query references table T (partitions P1, P2),
* and P1's schema is same as T, whereas P2's scheme is different from T, conversion
* might be needed for both P1 and P2, since SettableOI might be needed for T
*/
private Map<TableDesc, StructObjectInspector> getConvertedOI(Map<String, Configuration> tableToConf) throws HiveException {
Map<TableDesc, StructObjectInspector> tableDescOI = new HashMap<TableDesc, StructObjectInspector>();
Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
try {
Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();
for (Path onefile : conf.getPathToAliases().keySet()) {
PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
TableDesc tableDesc = pd.getTableDesc();
Configuration hconf = tableToConf.get(tableDesc.getTableName());
Deserializer partDeserializer = pd.getDeserializer(hconf);
StructObjectInspector partRawRowObjectInspector;
boolean isAcid = AcidUtils.isTablePropertyTransactional(tableDesc.getProperties());
if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
} else {
partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
}
StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
if ((tblRawRowObjectInspector == null) || (identityConverterTableDesc.contains(tableDesc))) {
Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partRawRowObjectInspector, tblDeserializer.getObjectInspector(), oiSettableProperties);
if (identityConverterTableDesc.contains(tableDesc)) {
if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
identityConverterTableDesc.remove(tableDesc);
}
} else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
identityConverterTableDesc.add(tableDesc);
}
tableDescOI.put(tableDesc, tblRawRowObjectInspector);
}
}
} catch (Exception e) {
throw new HiveException(e);
}
return tableDescOI;
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class ProjectionPusher method pushProjectionsAndFilters.
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException {
// TODO: refactor this in HIVE-6366
updateMrWork(jobConf);
final JobConf cloneJobConf = new JobConf(jobConf);
final PartitionDesc part = pathToPartitionInfo.get(path);
if ((part != null) && (part.getTableDesc() != null)) {
Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
}
pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath());
return cloneJobConf;
}
Aggregations