Search in sources :

Example 1 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class TestHiveKVResultCache method scanResultList.

private static long scanResultList(long rows, int threshold, int separate, List<Tuple2<HiveKey, BytesWritable>> output, String prefix1, String prefix2) {
    final long iteratorCount = threshold == 0 ? 1 : rows * (100 - separate) / 100 / threshold;
    MyHiveFunctionResultList resultList = new MyHiveFunctionResultList(new Iterator() {

        // Input record iterator, not used
        private int i = 0;

        @Override
        public boolean hasNext() {
            return i++ < iteratorCount;
        }

        @Override
        public Object next() {
            return Integer.valueOf(i);
        }

        @Override
        public void remove() {
        }
    });
    resultList.init(rows, threshold, separate, prefix1, prefix2);
    long startTime = System.currentTimeMillis();
    while (resultList.hasNext()) {
        Object item = resultList.next();
        if (output != null) {
            output.add((Tuple2<HiveKey, BytesWritable>) item);
        }
    }
    long endTime = System.currentTimeMillis();
    return endTime - startTime;
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) Iterator(java.util.Iterator) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 2 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class TestHiveKVResultCache method testSpillingHelper.

/** Helper method which inserts numRecords and retrieves them from cache and verifies */
private void testSpillingHelper(HiveKVResultCache cache, int numRecords) {
    for (int i = 0; i < numRecords; i++) {
        String key = "key_" + i;
        String value = "value_" + i;
        cache.add(new HiveKey(key.getBytes(), key.hashCode()), new BytesWritable(value.getBytes()));
    }
    int recordsSeen = 0;
    while (cache.hasNext()) {
        String key = "key_" + recordsSeen;
        String value = "value_" + recordsSeen;
        Tuple2<HiveKey, BytesWritable> row = cache.next();
        assertTrue("Unexpected key at position: " + recordsSeen, new String(row._1().getBytes()).equals(key));
        assertTrue("Unexpected value at position: " + recordsSeen, new String(row._2().getBytes()).equals(value));
        recordsSeen++;
    }
    assertTrue("Retrieved record count doesn't match inserted record count", numRecords == recordsSeen);
    cache.clear();
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 3 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class TestHiveKVResultCache method testSimple.

@Test
public void testSimple() throws Exception {
    // Create KV result cache object, add one (k,v) pair and retrieve them.
    HiveKVResultCache cache = new HiveKVResultCache();
    HiveKey key = new HiveKey("key".getBytes(), "key".hashCode());
    BytesWritable value = new BytesWritable("value".getBytes());
    cache.add(key, value);
    assertTrue("KV result cache should have at least one element", cache.hasNext());
    Tuple2<HiveKey, BytesWritable> row = cache.next();
    assertTrue("Incorrect key", row._1().equals(key));
    assertTrue("Incorrect value", row._2().equals(value));
    assertTrue("Cache shouldn't have more records", !cache.hasNext());
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 4 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class FileSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        this.hconf = hconf;
        filesCreated = false;
        isNativeTable = !conf.getTableInfo().isNonNative();
        isTemporary = conf.isTemporary();
        multiFileSpray = conf.isMultiFileSpray();
        totalFiles = conf.getTotalFiles();
        numFiles = conf.getNumFiles();
        dpCtx = conf.getDynPartCtx();
        lbCtx = conf.getLbCtx();
        fsp = prevFsp = null;
        valToPaths = new HashMap<String, FSPaths>();
        taskId = Utilities.getTaskId(hconf);
        initializeSpecPath();
        fs = specPath.getFileSystem(hconf);
        try {
            createHiveOutputFormat(hconf);
        } catch (HiveException ex) {
            logOutputFormatError(hconf, ex);
            throw ex;
        }
        isCompressed = conf.getCompressed();
        parent = Utilities.toTempPath(conf.getDirName());
        statsFromRecordWriter = new boolean[numFiles];
        serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
        serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
        outputClass = serializer.getSerializedClass();
        if (isLogInfoEnabled) {
            LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
        }
        // Timeout is chosen to make sure that even if one iteration takes more than
        // half of the script.timeout but less than script.timeout, we will still
        // be able to report progress.
        timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
        if (hconf instanceof JobConf) {
            jc = (JobConf) hconf;
        } else {
            // test code path
            jc = new JobConf(hconf);
        }
        if (multiFileSpray) {
            partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
            int i = 0;
            for (ExprNodeDesc e : conf.getPartitionCols()) {
                partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
            }
            partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
            prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
        }
        if (dpCtx != null) {
            dpSetup();
        }
        if (lbCtx != null) {
            lbSetup();
        }
        if (!bDynParts) {
            fsp = new FSPaths(specPath);
            // createBucketFiles(fsp);
            if (!this.isSkewedStoredAsSubDirectories) {
                // special entry for non-DP case
                valToPaths.put("", fsp);
            }
        }
        final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
        if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
            final Path outputPath = fsp.taskOutputTempPath;
            StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
            if (shim != null) {
                // directory creation is otherwise within the writers
                fs.mkdirs(outputPath);
                shim.setStoragePolicy(outputPath, tmpStorage);
            }
        }
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
            // ROW__ID is always in the first field
            recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
            recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
            // bucket is the second field in the record id
            bucketField = recIdInspector.getAllStructFieldRefs().get(1);
            bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
    } catch (HiveException e) {
        throw e;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StoragePolicyValue(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) StoragePolicyShim(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 5 with HiveKey

use of org.apache.hadoop.hive.ql.io.HiveKey in project hive by apache.

the class HiveKVResultCache method readHiveKey.

private HiveKey readHiveKey(Input input) {
    HiveKey hiveKey = new HiveKey(input.readBytes(input.readInt()), input.readInt());
    hiveKey.setDistKeyLength(input.readInt());
    return hiveKey;
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey)

Aggregations

HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)21 BytesWritable (org.apache.hadoop.io.BytesWritable)12 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 IOException (java.io.IOException)5 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 Input (com.esotericsoftware.kryo.io.Input)2 FileInputStream (java.io.FileInputStream)2 Path (org.apache.hadoop.fs.Path)2 GenericUDFHash (org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)2 PrimitiveObjectInspectorFactory.javaBooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector)2 PrimitiveObjectInspectorFactory.javaLongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector)2 PrimitiveObjectInspectorFactory.javaStringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector)2 JobConf (org.apache.hadoop.mapred.JobConf)2 FileNotFoundException (java.io.FileNotFoundException)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 List (java.util.List)1