Search in sources :

Example 6 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class HiveReduceFunctionResultList method processNextRecord.

@Override
protected void processNextRecord(Tuple2<HiveKey, V> inputRecord) throws IOException {
    HiveKey key = inputRecord._1();
    V value = inputRecord._2();
    if (value instanceof Iterable) {
        @SuppressWarnings("unchecked") Iterable<BytesWritable> values = (Iterable<BytesWritable>) value;
        reduceRecordHandler.<BytesWritable>processRow(key, values.iterator());
    } else {
        reduceRecordHandler.processRow(key, value);
    }
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 7 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class LocalHiveSparkClient method execute.

@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
    Context ctx = driverContext.getCtx();
    HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    JobConf jobConf = new JobConf(hiveConf);
    // Create temporary scratch dir
    Path emptyScratchDir;
    emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);
    // Update credential provider location
    // the password to the credential provider in already set in the sparkConf
    // in HiveSparkClientFactory
    HiveConfUtil.updateJobCredentialProviders(jobConf);
    SparkCounters sparkCounters = new SparkCounters(sc);
    Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
    if (prefixes != null) {
        for (String group : prefixes.keySet()) {
            for (String counterName : prefixes.get(group)) {
                sparkCounters.createCounter(group, counterName);
            }
        }
    }
    SparkReporter sparkReporter = new SparkReporter(sparkCounters);
    // Generate Spark plan
    SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
    SparkPlan plan = gen.generate(sparkWork);
    if (driverContext.isShutdown()) {
        throw new HiveException("Operation is cancelled.");
    }
    // Execute generated plan.
    JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
    // We use Spark RDD async action to submit job as it's the only way to get jobId now.
    JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
    // As we always use foreach action to submit RDD graph, it would only trigger one job.
    int jobId = future.jobIds().get(0);
    LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
    return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) SparkCounters(org.apache.hive.spark.counter.SparkCounters) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesWritable(org.apache.hadoop.io.BytesWritable) LocalSparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) LocalSparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobRef) JobConf(org.apache.hadoop.mapred.JobConf)

Example 8 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class KeyValueContainer method next.

public ObjectPair<HiveKey, BytesWritable> next() {
    Preconditions.checkState(hasNext());
    if (!readBufferUsed) {
        try {
            if (input == null && output != null) {
                // Close output stream if open
                output.close();
                output = null;
                FileInputStream fis = null;
                try {
                    fis = new FileInputStream(tmpFile);
                    input = new Input(fis);
                } finally {
                    if (input == null && fis != null) {
                        fis.close();
                    }
                }
            }
            if (input != null) {
                // Load next batch from disk
                if (rowsOnDisk >= IN_MEMORY_NUM_ROWS) {
                    rowsInReadBuffer = IN_MEMORY_NUM_ROWS;
                } else {
                    rowsInReadBuffer = rowsOnDisk;
                }
                for (int i = 0; i < rowsInReadBuffer; i++) {
                    ObjectPair<HiveKey, BytesWritable> pair = readBuffer[i];
                    pair.setFirst(readHiveKey(input));
                    pair.setSecond(readValue(input));
                }
                if (input.eof()) {
                    input.close();
                    input = null;
                }
                readBufferUsed = true;
                readCursor = 0;
                rowsOnDisk -= rowsInReadBuffer;
            }
        } catch (Exception e) {
            // Clean up the cache
            clear();
            throw new RuntimeException("Failed to load key/value pairs from disk", e);
        }
    }
    ObjectPair<HiveKey, BytesWritable> row = readBuffer[readCursor];
    if (++readCursor >= rowsInReadBuffer) {
        readBufferUsed = false;
        rowsInReadBuffer = 0;
        readCursor = 0;
    }
    return row;
}
Also used : Input(com.esotericsoftware.kryo.io.Input) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) BytesWritable(org.apache.hadoop.io.BytesWritable) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 9 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class KeyValueContainer method readHiveKey.

private HiveKey readHiveKey(Input input) {
    HiveKey hiveKey = new HiveKey(input.readBytes(input.readInt()), input.readInt());
    hiveKey.setDistKeyLength(input.readInt());
    return hiveKey;
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey)

Example 10 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class VectorReduceSinkCommonOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    if (LOG.isDebugEnabled()) {
        // Determine the name of our map or reduce task for debug tracing.
        BaseWork work = Utilities.getMapWork(hconf);
        if (work == null) {
            work = Utilities.getReduceWork(hconf);
        }
        taskName = work.getName();
    }
    String context = hconf.get(Operator.CONTEXT_NAME_KEY, "");
    if (context != null && !context.isEmpty()) {
        context = "_" + context.replace(" ", "_");
    }
    statsMap.put(Counter.RECORDS_OUT_INTERMEDIATE + context, recordCounter);
    reduceSkipTag = conf.getSkipTag();
    reduceTagByte = (byte) conf.getTag();
    if (isLogInfoEnabled) {
        LOG.info("Using tag = " + (int) reduceTagByte);
    }
    TableDesc keyTableDesc = conf.getKeySerializeInfo();
    boolean[] columnSortOrder = getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
    byte[] columnNullMarker = getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
    byte[] columnNotNullMarker = getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
    keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder, columnNullMarker, columnNotNullMarker);
    // Create all nulls key.
    try {
        Output nullKeyOutput = new Output();
        keyBinarySortableSerializeWrite.set(nullKeyOutput);
        for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
            keyBinarySortableSerializeWrite.writeNull();
        }
        int nullBytesLength = nullKeyOutput.getLength();
        nullBytes = new byte[nullBytesLength];
        System.arraycopy(nullKeyOutput.getData(), 0, nullBytes, 0, nullBytesLength);
        nullKeyHashCode = HashCodeUtil.calculateBytesHashCode(nullBytes, 0, nullBytesLength);
    } catch (Exception e) {
        throw new HiveException(e);
    }
    valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
    valueVectorSerializeRow = new VectorSerializeRow<LazyBinarySerializeWrite>(valueLazyBinarySerializeWrite);
    valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
    valueOutput = new Output();
    valueVectorSerializeRow.setOutput(valueOutput);
    keyWritable = new HiveKey();
    valueBytesWritable = new BytesWritable();
    batchCounter = 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)21 BytesWritable (org.apache.hadoop.io.BytesWritable)12 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 IOException (java.io.IOException)5 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 Input (com.esotericsoftware.kryo.io.Input)2 FileInputStream (java.io.FileInputStream)2 Path (org.apache.hadoop.fs.Path)2 GenericUDFHash (org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)2 PrimitiveObjectInspectorFactory.javaBooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector)2 PrimitiveObjectInspectorFactory.javaLongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector)2 PrimitiveObjectInspectorFactory.javaStringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector)2 JobConf (org.apache.hadoop.mapred.JobConf)2 FileNotFoundException (java.io.FileNotFoundException)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 List (java.util.List)1