Search in sources :

Example 46 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class PTFTranslator method setupShape.

private ShapeDetails setupShape(StructObjectInspector OI, List<String> columnNames, RowResolver rr) throws SemanticException {
    Map<String, String> serdePropsMap = new LinkedHashMap<String, String>();
    AbstractSerDe serde = null;
    ShapeDetails shp = new ShapeDetails();
    try {
        serde = PTFTranslator.createLazyBinarySerDe(hCfg, OI, serdePropsMap);
        StructObjectInspector outOI = PTFPartition.setupPartitionOutputOI(serde, OI);
        shp.setOI(outOI);
    } catch (SerDeException se) {
        throw new SemanticException(se);
    }
    shp.setRr(rr);
    shp.setSerde(serde);
    shp.setSerdeClassName(serde.getClass().getName());
    shp.setSerdeProps(serdePropsMap);
    shp.setColumnNames(columnNames);
    TypeCheckCtx tCtx = new TypeCheckCtx(rr);
    tCtx.setUnparseTranslator(unparseT);
    shp.setTypeCheckCtx(tCtx);
    return shp;
}
Also used : ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) LinkedHashMap(java.util.LinkedHashMap) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 47 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class PTFTranslator method createLazyBinarySerDe.

/*
   * OI & Serde helper methods
   */
protected static AbstractSerDe createLazyBinarySerDe(Configuration cfg, StructObjectInspector oi, Map<String, String> serdePropsMap) throws SerDeException {
    serdePropsMap = serdePropsMap == null ? new LinkedHashMap<String, String>() : serdePropsMap;
    PTFDeserializer.addOIPropertiestoSerDePropsMap(oi, serdePropsMap);
    AbstractSerDe serDe = new LazyBinarySerDe();
    Properties p = new Properties();
    p.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, serdePropsMap.get(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS));
    p.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, serdePropsMap.get(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES));
    SerDeUtils.initializeSerDe(serDe, cfg, p, null);
    return serDe;
}
Also used : LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LinkedHashMap(java.util.LinkedHashMap)

Example 48 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class HashTableSinkOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
    console = new LogHelper(LOG, isSilent);
    memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
    emptyRowContainer.addRow(emptyObjectArray);
    // for small tables only; so get the big table position first
    posBigTableAlias = conf.getPosBigTable();
    order = conf.getTagOrder();
    // initialize some variables, which used to be initialized in CommonJoinOperator
    this.hconf = hconf;
    filterMaps = conf.getFilterMap();
    int tagLen = conf.getTagLength();
    // process join keys
    joinKeys = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
    joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
    // process join values
    joinValues = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
    // process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
    if (!conf.isNoOuterJoin()) {
        for (Byte alias : order) {
            if (alias == posBigTableAlias || joinValues[alias] == null) {
                continue;
            }
            List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
            if (filterMaps != null && filterMaps[alias] != null) {
                // for each alias, add object inspector for filter tag as the last element
                rcOIs = new ArrayList<ObjectInspector>(rcOIs);
                rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            }
        }
    }
    mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
    if (hashTableScale <= 0) {
        hashTableScale = 1;
    }
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
        for (Byte pos : order) {
            if (pos == posBigTableAlias) {
                continue;
            }
            mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
            TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) MapJoinMemoryExhaustionHandler(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 49 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class DynamicPartitionFileRecordWriterContainer method getLocalFileWriter.

@Override
protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException {
    OutputJobInfo localJobInfo = null;
    // Calculate which writer to use from the remaining values - this needs to
    // be done before we delete cols.
    List<String> dynamicPartValues = new ArrayList<String>();
    for (Integer colToAppend : dynamicPartCols) {
        Object partitionValue = value.get(colToAppend);
        dynamicPartValues.add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString());
    }
    String dynKey = dynamicPartValues.toString();
    if (!baseDynamicWriters.containsKey(dynKey)) {
        if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
            throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed.");
        }
        org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context);
        configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
        localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());
        // Setup serDe.
        AbstractSerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf());
        try {
            InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
        } catch (SerDeException e) {
            throw new IOException("Failed to initialize SerDe", e);
        }
        // create base OutputFormat
        org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());
        // We are skipping calling checkOutputSpecs() for each partition
        // As it can throw a FileAlreadyExistsException when more than one
        // mapper is writing to a partition.
        // See HCATALOG-490, also to avoid contacting the namenode for each new
        // FileOutputFormat instance.
        // In general this should be ok for most FileOutputFormat implementations
        // but may become an issue for cases when the method is used to perform
        // other setup tasks.
        // Get Output Committer
        org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter();
        // Create currJobContext the latest so it gets all the config changes
        org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext);
        // Set up job.
        baseOutputCommitter.setupJob(currJobContext);
        // Recreate to refresh jobConf of currTask context.
        currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
        // Set temp location.
        currTaskContext.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString());
        // Set up task.
        baseOutputCommitter.setupTask(currTaskContext);
        Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));
        RecordWriter baseRecordWriter = baseOF.getRecordWriter(parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext));
        baseDynamicWriters.put(dynKey, baseRecordWriter);
        baseDynamicSerDe.put(dynKey, currSerDe);
        baseDynamicCommitters.put(dynKey, baseOutputCommitter);
        dynamicContexts.put(dynKey, currTaskContext);
        dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
        dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
    }
    return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey), baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey));
}
Also used : Path(org.apache.hadoop.fs.Path) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 50 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class ReduceRecordSource method init.

void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
    this.vectorizedVertexNum = vectorizedVertexNum;
    ObjectInspector keyObjectInspector;
    this.reducer = reducer;
    this.vectorized = vectorized;
    this.keyTableDesc = keyTableDesc;
    if (reader instanceof KeyValueReader) {
        this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
    } else {
        this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
    }
    this.handleGroupKey = handleGroupKey;
    this.tag = tag;
    try {
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        if (vectorized) {
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        }
        // We should initialize the SerDe with the TypeInfo when available.
        this.valueTableDesc = valueTableDesc;
        inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
        valueObjectInspector = inputValueDeserializer.getObjectInspector();
        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
        if (vectorized) {
            /* vectorization only works with struct object inspectors */
            valueStructInspectors = (StructObjectInspector) valueObjectInspector;
            final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
            valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
            rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
            batch = batchContext.createVectorizedRowBatch();
            // Setup vectorized deserialization for the key and value.
            BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
            keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
            true, binarySortableSerDe.getSortOrders()));
            keyBinarySortableDeserializeToRow.init(0);
            final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
            if (valuesSize > 0) {
                valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
                true));
                valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                // Create data buffers for value bytes column vectors.
                for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                    ColumnVector colVector = batch.cols[i];
                    if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        bytesColumnVector.initBuffer();
                    }
                }
            }
        } else {
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector);
            rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)40 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)22 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)16 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 Properties (java.util.Properties)12 ArrayList (java.util.ArrayList)10 BytesWritable (org.apache.hadoop.io.BytesWritable)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Writable (org.apache.hadoop.io.Writable)8 IOException (java.io.IOException)7 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 InputSplit (org.apache.hadoop.mapred.InputSplit)7 Test (org.junit.Test)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 Path (org.apache.hadoop.fs.Path)5 RecordWriter (org.apache.hadoop.mapred.RecordWriter)5 ConcurrentModificationException (java.util.ConcurrentModificationException)4