Search in sources :

Example 11 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestSerDe method serialize.

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    if (objInspector.getCategory() != Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
        if (i > 0) {
            sb.append(separator);
        }
        Object column = soi.getStructFieldData(obj, fields.get(i));
        if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
            // For primitive object, serialize to plain string
            sb.append(column == null ? nullString : column.toString());
        } else {
            // For complex object, serialize to JSON format
            sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
        }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
}
Also used : MetadataListStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 12 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class MetaStoreUtils method getFieldsFromDeserializer.

/**
   * @param tableName
   * @param deserializer
   * @return the list of fields
   * @throws SerDeException
   * @throws MetaException
   */
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
    ObjectInspector oi = deserializer.getObjectInspector();
    String[] names = tableName.split("\\.");
    String last_name = names[names.length - 1];
    for (int i = 1; i < names.length; i++) {
        if (oi instanceof StructObjectInspector) {
            StructObjectInspector soi = (StructObjectInspector) oi;
            StructField sf = soi.getStructFieldRef(names[i]);
            if (sf == null) {
                throw new MetaException("Invalid Field " + names[i]);
            } else {
                oi = sf.getFieldObjectInspector();
            }
        } else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
            ListObjectInspector loi = (ListObjectInspector) oi;
            oi = loi.getListElementObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapKeyObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapValueObjectInspector();
        } else {
            throw new MetaException("Unknown type for " + names[i]);
        }
    }
    ArrayList<FieldSchema> str_fields = new ArrayList<FieldSchema>();
    // rules on how to recurse the ObjectInspector based on its type
    if (oi.getCategory() != Category.STRUCT) {
        str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
    } else {
        List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
            StructField structField = fields.get(i);
            String fieldName = structField.getFieldName();
            String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
            String fieldComment = determineFieldComment(structField.getFieldComment());
            str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
        }
    }
    return str_fields;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 13 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class MultiDelimitSerDe method serializeNoEncode.

// This is basically the same as LazySimpleSerDe.serialize. Except that we don't use
// Base64 to encode binary data because we're using printable string as delimiter.
// Consider such a row "strAQ==\1", str is a string, AQ== is the delimiter and \1
// is the binary data.
private static void serializeNoEncode(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException {
    if (obj == null) {
        out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        return;
    }
    char separator;
    List<?> list;
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector oi = (PrimitiveObjectInspector) objInspector;
            if (oi.getPrimitiveCategory() == PrimitiveCategory.BINARY) {
                BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(obj);
                byte[] toWrite = new byte[bw.getLength()];
                System.arraycopy(bw.getBytes(), 0, toWrite, 0, bw.getLength());
                out.write(toWrite, 0, toWrite.length);
            } else {
                LazyUtils.writePrimitiveUTF8(out, obj, oi, escaped, escapeChar, needsEscape);
            }
            return;
        case LIST:
            separator = (char) separators[level];
            ListObjectInspector loi = (ListObjectInspector) objInspector;
            list = loi.getList(obj);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case MAP:
            separator = (char) separators[level];
            char keyValueSeparator = (char) separators[level + 1];
            MapObjectInspector moi = (MapObjectInspector) objInspector;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(obj);
            if (map == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        out.write(separator);
                    }
                    serializeNoEncode(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                    out.write(keyValueSeparator);
                    serializeNoEncode(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case STRUCT:
            separator = (char) separators[level];
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
    }
    throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 14 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class RegexSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    if (inputPattern == null) {
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }
    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());
    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        unmatchedRows++;
        if (unmatchedRows >= nextUnmatchedRows) {
            nextUnmatchedRows = getNextNumberToDisplay(nextUnmatchedRows);
            // Report the row
            LOG.warn("" + unmatchedRows + " unmatched rows are found: " + rowText);
        }
        return null;
    }
    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            row.set(c, m.group(c + 1));
        } catch (RuntimeException e) {
            partialMatchedRows++;
            if (partialMatchedRows >= nextPartialMatchedRows) {
                nextPartialMatchedRows = getNextNumberToDisplay(nextPartialMatchedRows);
                // Report the row
                LOG.warn("" + partialMatchedRows + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
            }
            row.set(c, null);
        }
    }
    return row;
}
Also used : Matcher(java.util.regex.Matcher) Text(org.apache.hadoop.io.Text) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 15 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TypedBytesSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    BytesWritable data = (BytesWritable) blob;
    inBarrStr.reset(data.getBytes(), 0, data.getLength());
    try {
        for (int i = 0; i < columnNames.size(); i++) {
            row.set(i, deserializeField(tbIn, columnTypes.get(i), row.get(i)));
        }
        // The next byte should be the marker
        assert tbIn.readTypeCode() == Type.ENDOFRECORD;
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    return row;
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)124 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)108 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)100 ArrayList (java.util.ArrayList)98 Properties (java.util.Properties)59 Test (org.junit.Test)59 Configuration (org.apache.hadoop.conf.Configuration)52 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)52 Text (org.apache.hadoop.io.Text)50 IOException (java.io.IOException)37 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)33 Schema (org.apache.avro.Schema)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)28 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)24 Put (org.apache.hadoop.hbase.client.Put)22 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)22 IntWritable (org.apache.hadoop.io.IntWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21