use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.
the class HiveReduceFunctionResultList method processNextRecord.
@Override
protected void processNextRecord(Tuple2<HiveKey, V> inputRecord) throws IOException {
HiveKey key = inputRecord._1();
V value = inputRecord._2();
if (value instanceof Iterable) {
@SuppressWarnings("unchecked") Iterable<BytesWritable> values = (Iterable<BytesWritable>) value;
reduceRecordHandler.<BytesWritable>processRow(key, values.iterator());
} else {
reduceRecordHandler.processRow(key, value);
}
}
use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.
the class LocalHiveSparkClient method execute.
@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Context ctx = driverContext.getCtx();
HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
JobConf jobConf = new JobConf(hiveConf);
// Create temporary scratch dir
Path emptyScratchDir;
emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
fs.mkdirs(emptyScratchDir);
// Update credential provider location
// the password to the credential provider in already set in the sparkConf
// in HiveSparkClientFactory
HiveConfUtil.updateJobCredentialProviders(jobConf);
SparkCounters sparkCounters = new SparkCounters(sc);
Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
if (prefixes != null) {
for (String group : prefixes.keySet()) {
for (String counterName : prefixes.get(group)) {
sparkCounters.createCounter(group, counterName);
}
}
}
SparkReporter sparkReporter = new SparkReporter(sparkCounters);
// Generate Spark plan
SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);
if (driverContext.isShutdown()) {
throw new HiveException("Operation is cancelled.");
}
// Execute generated plan.
JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
// We use Spark RDD async action to submit job as it's the only way to get jobId now.
JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
// As we always use foreach action to submit RDD graph, it would only trigger one job.
int jobId = future.jobIds().get(0);
LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.
the class KeyValueContainer method next.
public ObjectPair<HiveKey, BytesWritable> next() {
Preconditions.checkState(hasNext());
if (!readBufferUsed) {
try {
if (input == null && output != null) {
// Close output stream if open
output.close();
output = null;
FileInputStream fis = null;
try {
fis = new FileInputStream(tmpFile);
input = new Input(fis);
} finally {
if (input == null && fis != null) {
fis.close();
}
}
}
if (input != null) {
// Load next batch from disk
if (rowsOnDisk >= IN_MEMORY_NUM_ROWS) {
rowsInReadBuffer = IN_MEMORY_NUM_ROWS;
} else {
rowsInReadBuffer = rowsOnDisk;
}
for (int i = 0; i < rowsInReadBuffer; i++) {
ObjectPair<HiveKey, BytesWritable> pair = readBuffer[i];
pair.setFirst(readHiveKey(input));
pair.setSecond(readValue(input));
}
if (input.eof()) {
input.close();
input = null;
}
readBufferUsed = true;
readCursor = 0;
rowsOnDisk -= rowsInReadBuffer;
}
} catch (Exception e) {
// Clean up the cache
clear();
throw new RuntimeException("Failed to load key/value pairs from disk", e);
}
}
ObjectPair<HiveKey, BytesWritable> row = readBuffer[readCursor];
if (++readCursor >= rowsInReadBuffer) {
readBufferUsed = false;
rowsInReadBuffer = 0;
readCursor = 0;
}
return row;
}
use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.
the class KeyValueContainer method readHiveKey.
private HiveKey readHiveKey(Input input) {
HiveKey hiveKey = new HiveKey(input.readBytes(input.readInt()), input.readInt());
hiveKey.setDistKeyLength(input.readInt());
return hiveKey;
}
use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.
the class VectorReduceSinkCommonOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
if (LOG.isDebugEnabled()) {
// Determine the name of our map or reduce task for debug tracing.
BaseWork work = Utilities.getMapWork(hconf);
if (work == null) {
work = Utilities.getReduceWork(hconf);
}
taskName = work.getName();
}
String context = hconf.get(Operator.CONTEXT_NAME_KEY, "");
if (context != null && !context.isEmpty()) {
context = "_" + context.replace(" ", "_");
}
statsMap.put(Counter.RECORDS_OUT_INTERMEDIATE + context, recordCounter);
reduceSkipTag = conf.getSkipTag();
reduceTagByte = (byte) conf.getTag();
if (isLogInfoEnabled) {
LOG.info("Using tag = " + (int) reduceTagByte);
}
TableDesc keyTableDesc = conf.getKeySerializeInfo();
boolean[] columnSortOrder = getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
byte[] columnNullMarker = getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
byte[] columnNotNullMarker = getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder, columnNullMarker, columnNotNullMarker);
// Create all nulls key.
try {
Output nullKeyOutput = new Output();
keyBinarySortableSerializeWrite.set(nullKeyOutput);
for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
keyBinarySortableSerializeWrite.writeNull();
}
int nullBytesLength = nullKeyOutput.getLength();
nullBytes = new byte[nullBytesLength];
System.arraycopy(nullKeyOutput.getData(), 0, nullBytes, 0, nullBytesLength);
nullKeyHashCode = HashCodeUtil.calculateBytesHashCode(nullBytes, 0, nullBytesLength);
} catch (Exception e) {
throw new HiveException(e);
}
valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
valueVectorSerializeRow = new VectorSerializeRow<LazyBinarySerializeWrite>(valueLazyBinarySerializeWrite);
valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
valueOutput = new Output();
valueVectorSerializeRow.setOutput(valueOutput);
keyWritable = new HiveKey();
valueBytesWritable = new BytesWritable();
batchCounter = 0;
}
Aggregations