Search in sources :

Example 26 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestDataWritableWriter method getParquetWritable.

private ParquetHiveRecord getParquetWritable(String columnNames, String columnTypes, ArrayWritable record) throws SerDeException {
    Properties recordProperties = new Properties();
    recordProperties.setProperty("columns", columnNames);
    recordProperties.setProperty("columns.types", columnTypes);
    ParquetHiveSerDe serDe = new ParquetHiveSerDe();
    SerDeUtils.initializeSerDe(serDe, new Configuration(), recordProperties, null);
    return new ParquetHiveRecord(serDe.deserialize(record), getObjectInspector(columnNames, columnTypes));
}
Also used : ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ParquetHiveRecord(org.apache.hadoop.hive.serde2.io.ParquetHiveRecord) Properties(java.util.Properties)

Example 27 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class RegexSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());
    if (m.groupCount() != numColumns) {
        throw new SerDeException("Number of matching groups doesn't match the number of columns");
    }
    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        unmatchedRowsCount++;
        if (!alreadyLoggedNoMatch) {
            // Report the row if its the first time
            LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
            alreadyLoggedNoMatch = true;
        }
        return null;
    }
    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            String t = m.group(c + 1);
            TypeInfo typeInfo = columnTypes.get(c);
            // Convert the column to the correct type when needed and set in row obj
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
            switch(pti.getPrimitiveCategory()) {
                case STRING:
                    row.set(c, t);
                    break;
                case BYTE:
                    Byte b;
                    b = Byte.valueOf(t);
                    row.set(c, b);
                    break;
                case SHORT:
                    Short s;
                    s = Short.valueOf(t);
                    row.set(c, s);
                    break;
                case INT:
                    Integer i;
                    i = Integer.valueOf(t);
                    row.set(c, i);
                    break;
                case LONG:
                    Long l;
                    l = Long.valueOf(t);
                    row.set(c, l);
                    break;
                case FLOAT:
                    Float f;
                    f = Float.valueOf(t);
                    row.set(c, f);
                    break;
                case DOUBLE:
                    Double d;
                    d = Double.valueOf(t);
                    row.set(c, d);
                    break;
                case BOOLEAN:
                    Boolean bool;
                    bool = Boolean.valueOf(t);
                    row.set(c, bool);
                    break;
                case TIMESTAMP:
                    Timestamp ts;
                    ts = Timestamp.valueOf(t);
                    row.set(c, ts);
                    break;
                case DATE:
                    Date date;
                    date = Date.valueOf(t);
                    row.set(c, date);
                    break;
                case DECIMAL:
                    HiveDecimal bd = HiveDecimal.create(t);
                    row.set(c, bd);
                    break;
                case CHAR:
                    HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
                    row.set(c, hc);
                    break;
                case VARCHAR:
                    HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
                    row.set(c, hv);
                    break;
                default:
                    throw new SerDeException("Unsupported type " + typeInfo);
            }
        } catch (RuntimeException e) {
            partialMatchedRowsCount++;
            if (!alreadyLoggedPartialMatch) {
                // Report the row if its the first row
                LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
                alreadyLoggedPartialMatch = true;
            }
            row.set(c, null);
        }
    }
    return row;
}
Also used : Matcher(java.util.regex.Matcher) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(java.sql.Date) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Example 28 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    // Read the configuration parameters
    inputRegex = tbl.getProperty(INPUT_REGEX);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // output format string is not supported anymore, warn user of deprecation
    if (null != tbl.getProperty("output.format.string")) {
        LOG.warn("output.format.string has been deprecated");
    }
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    /* Constructing the row ObjectInspector:
     * The row consists of some set of primitive columns, each column will
     * be a java object of primitive type.
     */
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        TypeInfo typeInfo = columnTypes.get(c);
        if (typeInfo instanceof PrimitiveTypeInfo) {
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
            AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
            columnOIs.add(oi);
        } else {
            throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
        }
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
    row = new ArrayList<Object>(numColumns);
    // Constructing the row object, etc, which will be reused for all rows.
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) ArrayList(java.util.ArrayList) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 29 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class LazyBinaryColumnarSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
    columnNames = serdeParams.getColumnNames();
    columnTypes = serdeParams.getColumnTypes();
    cachedObjectInspector = LazyBinaryFactory.createColumnarStructInspector(columnNames, columnTypes);
    int size = columnTypes.size();
    List<Integer> notSkipIDs = new ArrayList<Integer>();
    if (conf == null || ColumnProjectionUtils.isReadAllColumns(conf)) {
        for (int i = 0; i < size; i++) {
            notSkipIDs.add(i);
        }
    } else {
        notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
    }
    cachedLazyStruct = new LazyBinaryColumnarStruct(cachedObjectInspector, notSkipIDs);
    super.initialize(size);
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) ArrayList(java.util.ArrayList)

Example 30 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class DynamicSerDe method dynamicSerDeStructBaseToObjectInspector.

public static ObjectInspector dynamicSerDeStructBaseToObjectInspector(DynamicSerDeTypeBase bt) throws SerDeException {
    if (bt.isList()) {
        return ObjectInspectorFactory.getStandardListObjectInspector(dynamicSerDeStructBaseToObjectInspector(((DynamicSerDeTypeList) bt).getElementType()));
    } else if (bt.isMap()) {
        DynamicSerDeTypeMap btMap = (DynamicSerDeTypeMap) bt;
        return ObjectInspectorFactory.getStandardMapObjectInspector(dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType()));
    } else if (bt.isPrimitive()) {
        PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(bt.getRealType());
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pte.primitiveCategory);
    } else {
        // Must be a struct
        DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt;
        DynamicSerDeFieldList fieldList = btStruct.getFieldList();
        DynamicSerDeField[] fields = fieldList.getChildren();
        ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
        ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
        for (DynamicSerDeField field : fields) {
            fieldNames.add(field.name);
            fieldObjectInspectors.add(dynamicSerDeStructBaseToObjectInspector(field.getFieldType().getMyType()));
        }
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveTypeEntry(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry) ArrayList(java.util.ArrayList)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)124 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)108 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)100 ArrayList (java.util.ArrayList)98 Properties (java.util.Properties)59 Test (org.junit.Test)59 Configuration (org.apache.hadoop.conf.Configuration)52 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)52 Text (org.apache.hadoop.io.Text)50 IOException (java.io.IOException)37 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)33 Schema (org.apache.avro.Schema)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)28 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)24 Put (org.apache.hadoop.hbase.client.Put)22 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)22 IntWritable (org.apache.hadoop.io.IntWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21