use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestCombineHiveInputFormat method testAvoidSplitCombination.
@Test
public void testAvoidSplitCombination() throws Exception {
Configuration conf = new Configuration();
JobConf job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder1"), partDesc);
pt.put(new Path("/tmp/testfolder2"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
try {
Path[] paths = new Path[2];
paths[0] = new Path("/tmp/testfolder1");
paths[1] = new Path("/tmp/testfolder2");
CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf);
combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo();
Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2);
assertEquals("Should have both path indices in the results set", 2, results.size());
} finally {
// Cleanup the mapwork path
FileSystem.get(conf).delete(mapWorkPath, true);
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestSkippingTextInputFormat method setUp.
@Before
public void setUp() throws IOException {
conf = new Configuration();
job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(job, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
fileSystem = FileSystem.getLocal(conf);
testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty("user.dir", new File(".").getAbsolutePath())) + "/TestSkippingTextInputFormat");
reporter = Reporter.NULL;
fileSystem.delete(testDir, true);
dataDir = new Path(testDir, "datadir");
fileSystem.mkdirs(dataDir);
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestSymlinkTextInputFormat method setUp.
@Before
public void setUp() throws IOException {
conf = new Configuration();
job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(job, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
fileSystem = FileSystem.getLocal(conf);
testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty("user.dir", new File(".").getAbsolutePath())) + "/TestSymlinkTextInputFormat");
reporter = Reporter.NULL;
fileSystem.delete(testDir, true);
dataDir1 = new Path(testDir, "datadir1");
dataDir2 = new Path(testDir, "datadir2");
symlinkDir = new Path(testDir, "symlinkdir");
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestHiveBinarySearchRecordReader method init.
private void init() throws IOException {
conf = new JobConf();
resetIOContext();
rcfReader = mock(RCFileRecordReader.class);
when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
// Since the start is 0, and the length is 100, the first call to sync should be with the value
// 50 so return that for getPos()
when(rcfReader.getPos()).thenReturn(50L);
conf.setBoolean("hive.input.format.sorted", true);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
hiveSplit = new TestHiveInputSplit();
hbsReader = new TestHiveRecordReader(rcfReader, conf);
hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class LlapInputFormat method createFakeVrbCtx.
static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveException {
// This is based on Vectorizer code, minus the validation.
// Add all non-virtual columns from the TableScan operator.
RowSchema rowSchema = findTsOp(mapWork).getSchema();
final List<String> colNames = new ArrayList<String>(rowSchema.getSignature().size());
final List<TypeInfo> colTypes = new ArrayList<TypeInfo>(rowSchema.getSignature().size());
ArrayList<VirtualColumn> virtualColumnList = new ArrayList<>(2);
for (ColumnInfo c : rowSchema.getSignature()) {
String columnName = c.getInternalName();
if (ALLOWED_VIRTUAL_COLUMNS.containsKey(columnName)) {
virtualColumnList.add(ALLOWED_VIRTUAL_COLUMNS.get(columnName));
} else if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) {
continue;
}
colNames.add(columnName);
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
}
// Determine the partition columns using the first partition descriptor.
// Note - like vectorizer, this assumes partition columns go after data columns.
int partitionColumnCount = 0;
Iterator<Path> paths = mapWork.getPathToAliases().keySet().iterator();
if (paths.hasNext()) {
PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next());
if (partDesc != null) {
LinkedHashMap<String, String> partSpec = partDesc.getPartSpec();
if (partSpec != null && !partSpec.isEmpty()) {
partitionColumnCount = partSpec.size();
}
}
}
final VirtualColumn[] virtualColumns = virtualColumnList.toArray(new VirtualColumn[0]);
return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), colTypes.toArray(new TypeInfo[colTypes.size()]), null, null, partitionColumnCount, virtualColumns.length, virtualColumns, new String[0], null);
}
Aggregations