use of org.apache.hadoop.hive.ql.io.AcidUtils.FileInfo in project hive by apache.
the class OrcInputFormat method determineSplitStrategies.
@VisibleForTesting
static List<SplitStrategy<?>> determineSplitStrategies(CombinedCtx combinedCtx, Context context, FileSystem fs, Path dir, List<FileInfo> baseFiles, List<ParsedDelta> parsedDeltas, List<OrcProto.Type> readerTypes, UserGroupInformation ugi, boolean allowSyntheticFileIds) throws IOException {
List<SplitStrategy<?>> splitStrategies = new ArrayList<SplitStrategy<?>>();
SplitStrategy<?> splitStrategy;
boolean checkDefaultFs = HiveConf.getBoolVar(context.conf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID);
boolean forceSynthetic = !HiveConf.getBoolVar(context.conf, ConfVars.LLAP_IO_USE_FILEID_PATH);
// if forceSynthetic == true, then assume it is not a defaultFS
boolean isDefaultFs = (forceSynthetic == false) && ((!checkDefaultFs) || ((fs instanceof DistributedFileSystem) && HdfsUtils.isDefaultFs((DistributedFileSystem) fs)));
if (baseFiles.isEmpty()) {
assert false : "acid 2.0 no base?!: " + dir;
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, Collections.emptyList(), false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
return splitStrategies;
}
List<HdfsFileStatusWithId> acidSchemaFiles = new ArrayList<>();
List<HdfsFileStatusWithId> originalSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
// Separate the base files into acid schema and non-acid(original) schema files.
for (FileInfo acidBaseFileInfo : baseFiles) {
if (acidBaseFileInfo.isOriginal()) {
originalSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
} else {
acidSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
}
}
// Generate split strategy for non-acid schema original files, if any.
if (!originalSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, originalSchemaFiles, true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
// Generate split strategy for acid schema files, if any.
if (!acidSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
return splitStrategies;
}
Aggregations