use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class TestDataWritableWriter method getParquetWritable.
private ParquetHiveRecord getParquetWritable(String columnNames, String columnTypes, ArrayWritable record) throws SerDeException {
Properties recordProperties = new Properties();
recordProperties.setProperty("columns", columnNames);
recordProperties.setProperty("columns.types", columnTypes);
ParquetHiveSerDe serDe = new ParquetHiveSerDe();
SerDeUtils.initializeSerDe(serDe, new Configuration(), recordProperties, null);
return new ParquetHiveRecord(serDe.deserialize(record), getObjectInspector(columnNames, columnTypes));
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class RegexSerDe method deserialize.
@Override
public Object deserialize(Writable blob) throws SerDeException {
Text rowText = (Text) blob;
Matcher m = inputPattern.matcher(rowText.toString());
if (m.groupCount() != numColumns) {
throw new SerDeException("Number of matching groups doesn't match the number of columns");
}
// If do not match, ignore the line, return a row with all nulls.
if (!m.matches()) {
unmatchedRowsCount++;
if (!alreadyLoggedNoMatch) {
// Report the row if its the first time
LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
alreadyLoggedNoMatch = true;
}
return null;
}
// Otherwise, return the row.
for (int c = 0; c < numColumns; c++) {
try {
String t = m.group(c + 1);
TypeInfo typeInfo = columnTypes.get(c);
// Convert the column to the correct type when needed and set in row obj
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
switch(pti.getPrimitiveCategory()) {
case STRING:
row.set(c, t);
break;
case BYTE:
Byte b;
b = Byte.valueOf(t);
row.set(c, b);
break;
case SHORT:
Short s;
s = Short.valueOf(t);
row.set(c, s);
break;
case INT:
Integer i;
i = Integer.valueOf(t);
row.set(c, i);
break;
case LONG:
Long l;
l = Long.valueOf(t);
row.set(c, l);
break;
case FLOAT:
Float f;
f = Float.valueOf(t);
row.set(c, f);
break;
case DOUBLE:
Double d;
d = Double.valueOf(t);
row.set(c, d);
break;
case BOOLEAN:
Boolean bool;
bool = Boolean.valueOf(t);
row.set(c, bool);
break;
case TIMESTAMP:
Timestamp ts;
ts = Timestamp.valueOf(t);
row.set(c, ts);
break;
case DATE:
Date date;
date = Date.valueOf(t);
row.set(c, date);
break;
case DECIMAL:
HiveDecimal bd = HiveDecimal.create(t);
row.set(c, bd);
break;
case CHAR:
HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
row.set(c, hc);
break;
case VARCHAR:
HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
row.set(c, hv);
break;
default:
throw new SerDeException("Unsupported type " + typeInfo);
}
} catch (RuntimeException e) {
partialMatchedRowsCount++;
if (!alreadyLoggedPartialMatch) {
// Report the row if its the first row
LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
alreadyLoggedPartialMatch = true;
}
row.set(c, null);
}
}
return row;
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class RegexSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// We can get the table definition from tbl.
// Read the configuration parameters
inputRegex = tbl.getProperty(INPUT_REGEX);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
// output format string is not supported anymore, warn user of deprecation
if (null != tbl.getProperty("output.format.string")) {
LOG.warn("output.format.string has been deprecated");
}
// Parse the configuration parameters
if (inputRegex != null) {
inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
} else {
inputPattern = null;
throw new SerDeException("This table does not have serde property \"input.regex\"!");
}
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();
/* Constructing the row ObjectInspector:
* The row consists of some set of primitive columns, each column will
* be a java object of primitive type.
*/
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
for (int c = 0; c < numColumns; c++) {
TypeInfo typeInfo = columnTypes.get(c);
if (typeInfo instanceof PrimitiveTypeInfo) {
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
columnOIs.add(oi);
} else {
throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
}
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
row = new ArrayList<Object>(numColumns);
// Constructing the row object, etc, which will be reused for all rows.
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
outputFields = new Object[numColumns];
outputRowText = new Text();
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class LazyBinaryColumnarSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
columnNames = serdeParams.getColumnNames();
columnTypes = serdeParams.getColumnTypes();
cachedObjectInspector = LazyBinaryFactory.createColumnarStructInspector(columnNames, columnTypes);
int size = columnTypes.size();
List<Integer> notSkipIDs = new ArrayList<Integer>();
if (conf == null || ColumnProjectionUtils.isReadAllColumns(conf)) {
for (int i = 0; i < size; i++) {
notSkipIDs.add(i);
}
} else {
notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
}
cachedLazyStruct = new LazyBinaryColumnarStruct(cachedObjectInspector, notSkipIDs);
super.initialize(size);
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class DynamicSerDe method dynamicSerDeStructBaseToObjectInspector.
public static ObjectInspector dynamicSerDeStructBaseToObjectInspector(DynamicSerDeTypeBase bt) throws SerDeException {
if (bt.isList()) {
return ObjectInspectorFactory.getStandardListObjectInspector(dynamicSerDeStructBaseToObjectInspector(((DynamicSerDeTypeList) bt).getElementType()));
} else if (bt.isMap()) {
DynamicSerDeTypeMap btMap = (DynamicSerDeTypeMap) bt;
return ObjectInspectorFactory.getStandardMapObjectInspector(dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType()));
} else if (bt.isPrimitive()) {
PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(bt.getRealType());
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pte.primitiveCategory);
} else {
// Must be a struct
DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt;
DynamicSerDeFieldList fieldList = btStruct.getFieldList();
DynamicSerDeField[] fields = fieldList.getChildren();
ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
for (DynamicSerDeField field : fields) {
fieldNames.add(field.name);
fieldObjectInspectors.add(dynamicSerDeStructBaseToObjectInspector(field.getFieldType().getMyType()));
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
}
}
Aggregations