Search in sources :

Example 46 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class RegexSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());
    if (m.groupCount() != numColumns) {
        throw new SerDeException("Number of matching groups doesn't match the number of columns");
    }
    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        unmatchedRowsCount++;
        if (!alreadyLoggedNoMatch) {
            // Report the row if its the first time
            LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
            alreadyLoggedNoMatch = true;
        }
        return null;
    }
    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            String t = m.group(c + 1);
            TypeInfo typeInfo = columnTypes.get(c);
            // Convert the column to the correct type when needed and set in row obj
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
            switch(pti.getPrimitiveCategory()) {
                case STRING:
                    row.set(c, t);
                    break;
                case BYTE:
                    Byte b;
                    b = Byte.valueOf(t);
                    row.set(c, b);
                    break;
                case SHORT:
                    Short s;
                    s = Short.valueOf(t);
                    row.set(c, s);
                    break;
                case INT:
                    Integer i;
                    i = Integer.valueOf(t);
                    row.set(c, i);
                    break;
                case LONG:
                    Long l;
                    l = Long.valueOf(t);
                    row.set(c, l);
                    break;
                case FLOAT:
                    Float f;
                    f = Float.valueOf(t);
                    row.set(c, f);
                    break;
                case DOUBLE:
                    Double d;
                    d = Double.valueOf(t);
                    row.set(c, d);
                    break;
                case BOOLEAN:
                    Boolean bool;
                    bool = Boolean.valueOf(t);
                    row.set(c, bool);
                    break;
                case TIMESTAMP:
                    Timestamp ts;
                    ts = Timestamp.valueOf(t);
                    row.set(c, ts);
                    break;
                case DATE:
                    Date date;
                    date = Date.valueOf(t);
                    row.set(c, date);
                    break;
                case DECIMAL:
                    HiveDecimal bd = HiveDecimal.create(t);
                    row.set(c, bd);
                    break;
                case CHAR:
                    HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
                    row.set(c, hc);
                    break;
                case VARCHAR:
                    HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
                    row.set(c, hv);
                    break;
                default:
                    throw new SerDeException("Unsupported type " + typeInfo);
            }
        } catch (RuntimeException e) {
            partialMatchedRowsCount++;
            if (!alreadyLoggedPartialMatch) {
                // Report the row if its the first row
                LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
                alreadyLoggedPartialMatch = true;
            }
            row.set(c, null);
        }
    }
    return row;
}
Also used : Matcher(java.util.regex.Matcher) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(java.sql.Date) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Example 47 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    // Read the configuration parameters
    inputRegex = tbl.getProperty(INPUT_REGEX);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // output format string is not supported anymore, warn user of deprecation
    if (null != tbl.getProperty("output.format.string")) {
        LOG.warn("output.format.string has been deprecated");
    }
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    /* Constructing the row ObjectInspector:
     * The row consists of some set of primitive columns, each column will
     * be a java object of primitive type.
     */
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        TypeInfo typeInfo = columnTypes.get(c);
        if (typeInfo instanceof PrimitiveTypeInfo) {
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
            AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
            columnOIs.add(oi);
        } else {
            throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
        }
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
    row = new ArrayList<Object>(numColumns);
    // Constructing the row object, etc, which will be reused for all rows.
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) ArrayList(java.util.ArrayList) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 48 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class AvroDeserializer method workerBase.

// The actual deserialization may involve nested records, which require recursion.
private List<Object> workerBase(List<Object> objectRow, Schema fileSchema, List<String> columnNames, List<TypeInfo> columnTypes, GenericRecord record) throws AvroSerdeException {
    for (int i = 0; i < columnNames.size(); i++) {
        TypeInfo columnType = columnTypes.get(i);
        String columnName = columnNames.get(i);
        Object datum = record.get(columnName);
        Schema datumSchema = record.getSchema().getField(columnName).schema();
        Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema) ? AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName) : fileSchema.getField(columnName);
        objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType));
    }
    return objectRow;
}
Also used : Schema(org.apache.avro.Schema) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 49 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class SchemaToTypeInfo method generateArrayTypeInfo.

private static TypeInfo generateArrayTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException {
    assert schema.getType().equals(Schema.Type.ARRAY);
    Schema itemsType = schema.getElementType();
    TypeInfo itemsTypeInfo = generateTypeInfo(itemsType, seenSchemas);
    return TypeInfoFactory.getListTypeInfo(itemsTypeInfo);
}
Also used : Schema(org.apache.avro.Schema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 50 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.

the class TypeInfoToSchema method createAvroMap.

private Schema createAvroMap(TypeInfo typeInfo) {
    TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
    if (((PrimitiveTypeInfo) keyTypeInfo).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throw new UnsupportedOperationException("Key of Map can only be a String");
    }
    TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
    Schema valueSchema = createAvroSchema(valueTypeInfo);
    return Schema.createMap(valueSchema);
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Schema(org.apache.avro.Schema) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)292 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)181 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)144 ArrayList (java.util.ArrayList)124 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)97 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)91 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)89 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)77 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)61 Test (org.junit.Test)54 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)53 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)50 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)44 Text (org.apache.hadoop.io.Text)41 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)39 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)36 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)35 List (java.util.List)33 HashMap (java.util.HashMap)32 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)32