use of org.apache.hadoop.hive.ql.io.AcidUtils.AcidBaseFileInfo in project hive by apache.
the class OrcInputFormat method determineSplitStrategies.
@VisibleForTesting
static List<SplitStrategy<?>> determineSplitStrategies(CombinedCtx combinedCtx, Context context, FileSystem fs, Path dir, AcidUtils.Directory dirInfo, List<AcidBaseFileInfo> baseFiles, List<ParsedDelta> parsedDeltas, List<OrcProto.Type> readerTypes, UserGroupInformation ugi, boolean allowSyntheticFileIds) {
List<SplitStrategy<?>> splitStrategies = new ArrayList<SplitStrategy<?>>();
SplitStrategy<?> splitStrategy;
// When no baseFiles, we will just generate a single split strategy and return.
List<HdfsFileStatusWithId> acidSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
if (baseFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo, acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
// return here
return splitStrategies;
}
List<HdfsFileStatusWithId> originalSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
// Separate the base files into acid schema and non-acid(original) schema files.
for (AcidBaseFileInfo acidBaseFileInfo : baseFiles) {
if (acidBaseFileInfo.isOriginal()) {
originalSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
} else {
acidSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
}
}
// Generate split strategy for non-acid schema original files, if any.
if (!originalSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo, originalSchemaFiles, true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
// Generate split strategy for acid schema files, if any.
if (!acidSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo, acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
return splitStrategies;
}
use of org.apache.hadoop.hive.ql.io.AcidUtils.AcidBaseFileInfo in project hive by apache.
the class OrcInputFormat method determineSplitStrategies.
@VisibleForTesting
static List<SplitStrategy<?>> determineSplitStrategies(CombinedCtx combinedCtx, Context context, FileSystem fs, Path dir, List<AcidBaseFileInfo> baseFiles, List<ParsedDelta> parsedDeltas, List<OrcProto.Type> readerTypes, UserGroupInformation ugi, boolean allowSyntheticFileIds) throws IOException {
List<SplitStrategy<?>> splitStrategies = new ArrayList<SplitStrategy<?>>();
SplitStrategy<?> splitStrategy;
boolean checkDefaultFs = HiveConf.getBoolVar(context.conf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID);
boolean isDefaultFs = (!checkDefaultFs) || ((fs instanceof DistributedFileSystem) && HdfsUtils.isDefaultFs((DistributedFileSystem) fs));
if (baseFiles.isEmpty()) {
assert false : "acid 2.0 no base?!: " + dir;
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, Collections.emptyList(), false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
return splitStrategies;
}
List<HdfsFileStatusWithId> acidSchemaFiles = new ArrayList<>();
List<HdfsFileStatusWithId> originalSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
// Separate the base files into acid schema and non-acid(original) schema files.
for (AcidBaseFileInfo acidBaseFileInfo : baseFiles) {
if (acidBaseFileInfo.isOriginal()) {
originalSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
} else {
assert acidBaseFileInfo.isAcidSchema();
acidSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
}
}
// Generate split strategy for non-acid schema original files, if any.
if (!originalSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, originalSchemaFiles, true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
// Generate split strategy for acid schema files, if any.
if (!acidSchemaFiles.isEmpty()) {
splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
if (splitStrategy != null) {
splitStrategies.add(splitStrategy);
}
}
return splitStrategies;
}
Aggregations