Search in sources :

Example 21 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.

private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
    StringBuffer names = new StringBuffer();
    names.append("dbName,tableName,");
    StringBuffer types = new StringBuffer();
    types.append("string,string,");
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
    try {
        serDe.initialize(conf, props);
        List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
        List<String> keys = new ArrayList<>();
        for (int i = 0; i < deserializedkeys.size(); i++) {
            Object deserializedKey = deserializedkeys.get(i);
            if (deserializedKey == null) {
                throw new RuntimeException("Can't have a null dbname or tablename");
            } else {
                TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
                ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
                Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
                keys.add((String) converter.convert(deserializedKey));
            }
        }
        return keys;
    } catch (SerDeException e) {
        throw new RuntimeException("Error when deserialize key", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteString(com.google.protobuf.ByteString) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) List(java.util.List) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 22 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class PartitionKeyComparator method compareTo.

@Override
public int compareTo(byte[] value, int offset, int length) {
    byte[] bytes = Arrays.copyOfRange(value, offset, offset + length);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Get key " + new String(bytes));
    }
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    List deserializedkeys = null;
    try {
        serDe.initialize(new Configuration(), serdeProps);
        deserializedkeys = ((List) serDe.deserialize(new BytesWritable(bytes))).subList(2, 2 + names.split(",").length);
    } catch (SerDeException e) {
        // don't bother with failed deserialization, continue with next key
        return 1;
    }
    for (int i = 0; i < ranges.size(); i++) {
        Range range = ranges.get(i);
        NativeRange nativeRange = nativeRanges.get(i);
        Comparable partVal = (Comparable) deserializedkeys.get(nativeRange.pos);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Try to match range " + partVal + ", start " + nativeRange.start + ", end " + nativeRange.end);
        }
        if (range.start == null || range.start.inclusive && partVal.compareTo(nativeRange.start) >= 0 || !range.start.inclusive && partVal.compareTo(nativeRange.start) > 0) {
            if (range.end == null || range.end.inclusive && partVal.compareTo(nativeRange.end) <= 0 || !range.end.inclusive && partVal.compareTo(nativeRange.end) < 0) {
                continue;
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Fail to match range " + range.keyName + "-" + partVal + "[" + nativeRange.start + "," + nativeRange.end + "]");
        }
        return 1;
    }
    for (int i = 0; i < ops.size(); i++) {
        Operator op = ops.get(i);
        NativeOperator nativeOp = nativeOps.get(i);
        switch(op.type) {
            case LIKE:
                if (!deserializedkeys.get(nativeOp.pos).toString().matches(op.val)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ") LIKE " + nativeOp.val);
                    }
                    return 1;
                }
                break;
            case NOTEQUALS:
                if (nativeOp.val.equals(deserializedkeys.get(nativeOp.pos))) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ")!=" + nativeOp.val);
                    }
                    return 1;
                }
                break;
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("All conditions satisfied:" + deserializedkeys);
    }
    return 0;
}
Also used : ByteArrayComparable(org.apache.hadoop.hbase.filter.ByteArrayComparable) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) ArrayList(java.util.ArrayList) List(java.util.List) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 23 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class Utilities method createEmptyBuckets.

/**
   * Check the existence of buckets according to bucket specification. Create empty buckets if
   * needed.
   *
   * @param hconf
   * @param paths A list of empty buckets to create
   * @param conf The definition of the FileSink.
   * @param reporter The mapreduce reporter object
   * @throws HiveException
   * @throws IOException
   */
private static void createEmptyBuckets(Configuration hconf, List<Path> paths, FileSinkDesc conf, Reporter reporter) throws HiveException, IOException {
    JobConf jc;
    if (hconf instanceof JobConf) {
        jc = new JobConf(hconf);
    } else {
        // test code path
        jc = new JobConf(hconf);
    }
    HiveOutputFormat<?, ?> hiveOutputFormat = null;
    Class<? extends Writable> outputClass = null;
    boolean isCompressed = conf.getCompressed();
    TableDesc tableInfo = conf.getTableInfo();
    try {
        Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance();
        serializer.initialize(null, tableInfo.getProperties());
        outputClass = serializer.getSerializedClass();
        hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo());
    } catch (SerDeException e) {
        throw new HiveException(e);
    } catch (InstantiationException e) {
        throw new HiveException(e);
    } catch (IllegalAccessException e) {
        throw new HiveException(e);
    }
    for (Path path : paths) {
        RecordWriter writer = HiveFileFormatUtils.getRecordWriter(jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter);
        writer.close(false);
        LOG.info("created empty bucket for enforcing bucketing at " + path);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 24 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class SkewJoinHandler method initiliaze.

public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);
    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
        StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
        StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
        List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
        int keyFieldSize = keyFields.size();
        for (int k = 0; k < keyFieldSize; k++) {
            skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
        }
        TableDesc joinKeyDesc = desc.getKeyTableDesc();
        List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
        StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
        try {
            AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
            tblSerializers.put((byte) i, serializer);
        } catch (SerDeException e) {
            LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
            joinOp.handleSkewJoin = false;
            break;
        }
        boolean hasFilter = filterMap != null && filterMap[i] != null;
        TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
        List<String> valColNames = new ArrayList<String>();
        if (valTblDesc != null) {
            valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
        }
        StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
        StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
        skewKeysTableObjectInspector.put((byte) i, structTblInpector);
    }
    // reset rowcontainer's serde, objectinspector, and tableDesc.
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
        if (rc != null) {
            rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
            rc.setTableDesc(tblDesc.get(alias));
        }
    }
}
Also used : RowContainer(org.apache.hadoop.hive.ql.exec.persistence.RowContainer) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 25 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestParquetSerDe method deserializeAndSerializeLazySimple.

private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {
    // Get the row structure
    final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    // Deserialize
    final Object row = serDe.deserialize(t);
    assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
    assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
    assertEquals("deserialization gives the wrong object", t, row);
    // Serialize
    final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi);
    assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable) serializedArr.getObject()).get().length);
    assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable) serializedArr.getObject()));
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) ParquetHiveRecord(org.apache.hadoop.hive.serde2.io.ParquetHiveRecord) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)124 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)108 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)100 ArrayList (java.util.ArrayList)98 Properties (java.util.Properties)59 Test (org.junit.Test)59 Configuration (org.apache.hadoop.conf.Configuration)52 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)52 Text (org.apache.hadoop.io.Text)50 IOException (java.io.IOException)37 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)33 Schema (org.apache.avro.Schema)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)28 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)24 Put (org.apache.hadoop.hbase.client.Put)22 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)22 IntWritable (org.apache.hadoop.io.IntWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21