use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.
private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
StringBuffer names = new StringBuffer();
names.append("dbName,tableName,");
StringBuffer types = new StringBuffer();
types.append("string,string,");
BinarySortableSerDe serDe = new BinarySortableSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
try {
serDe.initialize(conf, props);
List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
List<String> keys = new ArrayList<>();
for (int i = 0; i < deserializedkeys.size(); i++) {
Object deserializedKey = deserializedkeys.get(i);
if (deserializedKey == null) {
throw new RuntimeException("Can't have a null dbname or tablename");
} else {
TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
keys.add((String) converter.convert(deserializedKey));
}
}
return keys;
} catch (SerDeException e) {
throw new RuntimeException("Error when deserialize key", e);
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class PartitionKeyComparator method compareTo.
@Override
public int compareTo(byte[] value, int offset, int length) {
byte[] bytes = Arrays.copyOfRange(value, offset, offset + length);
if (LOG.isDebugEnabled()) {
LOG.debug("Get key " + new String(bytes));
}
BinarySortableSerDe serDe = new BinarySortableSerDe();
List deserializedkeys = null;
try {
serDe.initialize(new Configuration(), serdeProps);
deserializedkeys = ((List) serDe.deserialize(new BytesWritable(bytes))).subList(2, 2 + names.split(",").length);
} catch (SerDeException e) {
// don't bother with failed deserialization, continue with next key
return 1;
}
for (int i = 0; i < ranges.size(); i++) {
Range range = ranges.get(i);
NativeRange nativeRange = nativeRanges.get(i);
Comparable partVal = (Comparable) deserializedkeys.get(nativeRange.pos);
if (LOG.isDebugEnabled()) {
LOG.debug("Try to match range " + partVal + ", start " + nativeRange.start + ", end " + nativeRange.end);
}
if (range.start == null || range.start.inclusive && partVal.compareTo(nativeRange.start) >= 0 || !range.start.inclusive && partVal.compareTo(nativeRange.start) > 0) {
if (range.end == null || range.end.inclusive && partVal.compareTo(nativeRange.end) <= 0 || !range.end.inclusive && partVal.compareTo(nativeRange.end) < 0) {
continue;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match range " + range.keyName + "-" + partVal + "[" + nativeRange.start + "," + nativeRange.end + "]");
}
return 1;
}
for (int i = 0; i < ops.size(); i++) {
Operator op = ops.get(i);
NativeOperator nativeOp = nativeOps.get(i);
switch(op.type) {
case LIKE:
if (!deserializedkeys.get(nativeOp.pos).toString().matches(op.val)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ") LIKE " + nativeOp.val);
}
return 1;
}
break;
case NOTEQUALS:
if (nativeOp.val.equals(deserializedkeys.get(nativeOp.pos))) {
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ")!=" + nativeOp.val);
}
return 1;
}
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("All conditions satisfied:" + deserializedkeys);
}
return 0;
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class Utilities method createEmptyBuckets.
/**
* Check the existence of buckets according to bucket specification. Create empty buckets if
* needed.
*
* @param hconf
* @param paths A list of empty buckets to create
* @param conf The definition of the FileSink.
* @param reporter The mapreduce reporter object
* @throws HiveException
* @throws IOException
*/
private static void createEmptyBuckets(Configuration hconf, List<Path> paths, FileSinkDesc conf, Reporter reporter) throws HiveException, IOException {
JobConf jc;
if (hconf instanceof JobConf) {
jc = new JobConf(hconf);
} else {
// test code path
jc = new JobConf(hconf);
}
HiveOutputFormat<?, ?> hiveOutputFormat = null;
Class<? extends Writable> outputClass = null;
boolean isCompressed = conf.getCompressed();
TableDesc tableInfo = conf.getTableInfo();
try {
Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance();
serializer.initialize(null, tableInfo.getProperties());
outputClass = serializer.getSerializedClass();
hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo());
} catch (SerDeException e) {
throw new HiveException(e);
} catch (InstantiationException e) {
throw new HiveException(e);
} catch (IllegalAccessException e) {
throw new HiveException(e);
}
for (Path path : paths) {
RecordWriter writer = HiveFileFormatUtils.getRecordWriter(jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter);
writer.close(false);
LOG.info("created empty bucket for enforcing bucketing at " + path);
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class SkewJoinHandler method initiliaze.
public void initiliaze(Configuration hconf) {
this.hconf = hconf;
JoinDesc desc = joinOp.getConf();
skewKeyDefinition = desc.getSkewKeyDefinition();
skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
tblDesc = desc.getSkewKeysValuesTables();
tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
taskId = Utilities.getTaskId(hconf);
int[][] filterMap = desc.getFilterMap();
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
int keyFieldSize = keyFields.size();
for (int k = 0; k < keyFieldSize; k++) {
skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
}
TableDesc joinKeyDesc = desc.getKeyTableDesc();
List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
try {
AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
tblSerializers.put((byte) i, serializer);
} catch (SerDeException e) {
LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
joinOp.handleSkewJoin = false;
break;
}
boolean hasFilter = filterMap != null && filterMap[i] != null;
TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
List<String> valColNames = new ArrayList<String>();
if (valTblDesc != null) {
valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
}
StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
skewKeysTableObjectInspector.put((byte) i, structTblInpector);
}
// reset rowcontainer's serde, objectinspector, and tableDesc.
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
if (rc != null) {
rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
rc.setTableDesc(tblDesc.get(alias));
}
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class TestParquetSerDe method deserializeAndSerializeLazySimple.
private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {
// Get the row structure
final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
// Deserialize
final Object row = serDe.deserialize(t);
assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
assertEquals("deserialization gives the wrong object", t, row);
// Serialize
final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi);
assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable) serializedArr.getObject()).get().length);
assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable) serializedArr.getObject()));
}
Aggregations