use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class Utilities method createDummyFileForEmptyPartition.
@SuppressWarnings("rawtypes")
private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir) throws Exception {
String strPath = path.toString();
// The input file does not exist, replace it by a empty file
PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
if (partDesc.getTableDesc().isNonNative()) {
// if this isn't a hive table we can't create an empty file for it.
return path;
}
Properties props = SerDeUtils.createOverlayedProperties(partDesc.getTableDesc().getProperties(), partDesc.getProperties());
HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc);
boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class;
Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow);
if (LOG.isInfoEnabled()) {
LOG.info("Changed input file " + strPath + " to empty file " + newPath + " (" + oneRow + ")");
}
// update the work
work.addPathToAlias(newPath, work.getPathToAliases().get(path));
work.removePathToAlias(path);
work.removePathToPartitionInfo(path);
work.addPathToPartitionInfo(newPath, partDesc);
return newPath;
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class DynamicPartitionPruner method applyFilterToPartitions.
@SuppressWarnings("rawtypes")
private void applyFilterToPartitions(Converter converter, ExprNodeEvaluator eval, String columnName, Set<Object> values) throws HiveException {
Object[] row = new Object[1];
Iterator<Path> it = work.getPathToPartitionInfo().keySet().iterator();
while (it.hasNext()) {
Path p = it.next();
PartitionDesc desc = work.getPathToPartitionInfo().get(p);
Map<String, String> spec = desc.getPartSpec();
if (spec == null) {
throw new IllegalStateException("No partition spec found in dynamic pruning");
}
String partValueString = spec.get(columnName);
if (partValueString == null) {
throw new IllegalStateException("Could not find partition value for column: " + columnName);
}
Object partValue = converter.convert(partValueString);
if (LOG.isDebugEnabled()) {
LOG.debug("Converted partition value: " + partValue + " original (" + partValueString + ")");
}
row[0] = partValue;
partValue = eval.evaluate(row);
if (LOG.isDebugEnabled()) {
LOG.debug("part key expr applied: " + partValue);
}
if (!values.contains(partValue)) {
LOG.info("Pruning path: " + p);
it.remove();
// work.removePathToPartitionInfo(p);
work.removePathToAlias(p);
}
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestUtilities method runTestGetInputSummary.
private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
// creates scratch directories needed by the Context object
SessionState.start(new HiveConf());
MapWork mapWork = new MapWork();
Context context = new Context(jobConf);
LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
TableScanOperator scanOp = new TableScanOperator();
PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
String testTableName = "testTable";
Path testTablePath = new Path(testTableName);
Path[] testPartitionsPaths = new Path[numOfPartitions];
for (int i = 0; i < numOfPartitions; i++) {
String testPartitionName = "p=" + 1;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, scanOp);
}
mapWork.setPathToAliases(pathToAliasTable);
mapWork.setPathToPartitionInfo(pathToPartitionInfo);
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
byte[] data = new byte[bytesPerFile];
for (int i = 0; i < numOfPartitions; i++) {
fs.mkdirs(testPartitionsPaths[i]);
FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
out.write(data);
out.close();
}
return Utilities.getInputSummary(context, mapWork, null);
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestUtilities method testGetInputPathsWithEmptyTables.
/**
* Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
* can process two different empty tables without throwing any exceptions.
*/
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
String alias1Name = "alias1";
String alias2Name = "alias2";
MapWork mapWork1 = new MapWork();
MapWork mapWork2 = new MapWork();
JobConf jobConf = new JobConf();
Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
when(mockTableDesc.isNonNative()).thenReturn(false);
when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
List<Path> inputPaths = new ArrayList<>();
try {
Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
assertEquals(inputPaths.size(), 2);
} finally {
File file;
for (Path path : inputPaths) {
file = new File(path.toString());
if (file.exists()) {
file.delete();
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class VectorMapOperator method setupPartitionContextVars.
/*
* Setup the context for reading from the next partition file.
*/
private void setupPartitionContextVars(String nominalPath) throws HiveException {
currentVectorPartContext = fileToPartitionContextMap.get(nominalPath);
if (currentVectorPartContext == null) {
return;
}
PartitionDesc partDesc = currentVectorPartContext.getPartDesc();
VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
currentReadType = vectorPartDesc.getVectorMapOperatorReadType();
/*
* Setup for 3 different kinds of vectorized reading supported:
*
* 1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
*
* 2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
*
* 3) And read using the regular partition deserializer to get the row object and assigning
* the row object into the VectorizedRowBatch with VectorAssignRow.
*/
if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
/*
* The Vectorized Input File Format reader is responsible for setting the partition column
* values, resetting and filling in the batch, etc.
*/
/*
* Clear all the reading variables.
*/
currentDataColumnCount = 0;
currentDeserializeRead = null;
currentVectorDeserializeRow = null;
currentPartDeserializer = null;
currentPartRawRowObjectInspector = null;
currentVectorAssign = null;
} else {
/*
* We will get "regular" single rows from the Input File Format reader that we will need
* to {vector|row} deserialize.
*/
Preconditions.checkState(currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
/*
* Clear out any rows in the batch from previous partition since we are going to change
* the repeating partition column values.
*/
if (!flushDeserializerBatch()) {
// Operator tree is now done.
return;
}
/*
* For this particular file, how many columns will we actually read?
*/
currentDataColumnCount = currentVectorPartContext.getReaderDataColumnCount();
if (currentDataColumnCount < dataColumnCount) {
/*
* Default any additional data columns to NULL once for the file (if they are present).
*/
for (int i = currentDataColumnCount; i < dataColumnCount; i++) {
ColumnVector colVector = deserializerBatch.cols[i];
if (colVector != null) {
colVector.isNull[0] = true;
colVector.noNulls = false;
colVector.isRepeating = true;
}
}
}
if (batchContext.getPartitionColumnCount() > 0) {
/*
* The partition columns are set once for the partition and are marked repeating.
*/
VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues);
batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
}
if (hasRowIdentifier) {
// No ACID in code path -- set ROW__ID to NULL.
setRowIdentiferToNull(deserializerBatch);
}
/*
* Set or clear the rest of the reading variables based on {vector|row} deserialization.
*/
switch(currentReadType) {
case VECTOR_DESERIALIZE:
{
VectorDeserializePartitionContext vectorDeserPartContext = (VectorDeserializePartitionContext) currentVectorPartContext;
// Set ours.
currentDeserializeRead = vectorDeserPartContext.getDeserializeRead();
currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow();
// Clear the other ones.
currentPartDeserializer = null;
currentPartRawRowObjectInspector = null;
currentVectorAssign = null;
}
break;
case ROW_DESERIALIZE:
{
RowDeserializePartitionContext rowDeserPartContext = (RowDeserializePartitionContext) currentVectorPartContext;
// Clear the other ones.
currentDeserializeRead = null;
currentVectorDeserializeRow = null;
// Set ours.
currentPartDeserializer = rowDeserPartContext.getPartDeserializer();
currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector();
currentVectorAssign = rowDeserPartContext.getVectorAssign();
}
break;
default:
throw new RuntimeException("Unexpected VectorMapOperator read type " + currentReadType.name());
}
}
}
Aggregations