use of org.apache.iceberg.mr.mapreduce.IcebergSplit in project hive by apache.
the class HiveIcebergInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
// Convert Hive filter to Iceberg filter
String hiveFilter = job.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (hiveFilter != null) {
ExprNodeGenericFuncDesc exprNodeDesc = SerializationUtilities.deserializeObject(hiveFilter, ExprNodeGenericFuncDesc.class);
SearchArgument sarg = ConvertAstToSearchArg.create(job, exprNodeDesc);
try {
Expression filter = HiveIcebergFilterFactory.generateFilterExpression(sarg);
job.set(InputFormatConfig.FILTER_EXPRESSION, SerializationUtil.serializeToBase64(filter));
} catch (UnsupportedOperationException e) {
LOG.warn("Unable to create Iceberg filter, continuing without filter (will be applied by Hive later): ", e);
}
}
job.set(InputFormatConfig.SELECTED_COLUMNS, job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, ""));
job.set(InputFormatConfig.AS_OF_TIMESTAMP, job.get(TableScanDesc.AS_OF_TIMESTAMP, "-1"));
job.set(InputFormatConfig.SNAPSHOT_ID, job.get(TableScanDesc.AS_OF_VERSION, "-1"));
String location = job.get(InputFormatConfig.TABLE_LOCATION);
return Arrays.stream(super.getSplits(job, numSplits)).map(split -> new HiveIcebergSplit((IcebergSplit) split, location)).toArray(InputSplit[]::new);
}
use of org.apache.iceberg.mr.mapreduce.IcebergSplit in project hive by apache.
the class HiveIcebergInputFormat method getRecordReader.
@Override
public RecordReader<Void, Container<Record>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
job.set(InputFormatConfig.SELECTED_COLUMNS, job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, ""));
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && Utilities.getIsVectorized(job)) {
Preconditions.checkArgument(MetastoreUtil.hive3PresentOnClasspath(), "Vectorization only supported for Hive 3+");
job.setEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL, InputFormatConfig.InMemoryDataModel.HIVE);
job.setBoolean(InputFormatConfig.SKIP_RESIDUAL_FILTERING, true);
IcebergSplit icebergSplit = ((IcebergSplitContainer) split).icebergSplit();
// bogus cast for favouring code reuse over syntax
return (RecordReader) HIVE_VECTORIZED_RECORDREADER_CTOR.newInstance(new IcebergInputFormat<>(), icebergSplit, job, reporter);
} else {
return super.getRecordReader(split, job, reporter);
}
}
use of org.apache.iceberg.mr.mapreduce.IcebergSplit in project hive by apache.
the class HiveIcebergSplit method readFields.
@Override
public void readFields(DataInput in) throws IOException {
byte[] bytes = new byte[in.readInt()];
in.readFully(bytes);
tableLocation = SerializationUtil.deserializeFromBytes(bytes);
innerSplit = new IcebergSplit();
innerSplit.readFields(in);
}
Aggregations