Search in sources :

Example 1 with HiveJsonReader

use of org.apache.hadoop.hive.serde2.json.HiveJsonReader in project hive by apache.

the class GenericUDFJsonRead method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    checkArgsSize(arguments, 2, 2);
    checkArgPrimitive(arguments, 0);
    checkArgPrimitive(arguments, 1);
    if (!ObjectInspectorUtils.isConstantObjectInspector(arguments[1])) {
        throw new UDFArgumentTypeException(1, getFuncName() + " argument 2 may only be a constant");
    }
    inputConverter = new TextConverter((PrimitiveObjectInspector) arguments[0]);
    String typeStr = getConstantStringValue(arguments, 1);
    try {
        final TypeInfo t = TypeInfoUtils.getTypeInfoFromTypeString(typeStr);
        final ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(t);
        jsonReader = new HiveJsonReader(oi);
        jsonReader.enable(Feature.PRIMITIVE_TO_WRITABLE);
    } catch (Exception e) {
        throw new UDFArgumentException(getFuncName() + ": Error parsing typestring: " + e.getMessage());
    }
    return jsonReader.getObjectInspector();
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveJsonReader(org.apache.hadoop.hive.serde2.json.HiveJsonReader) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) TextConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TextConverter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 2 with HiveJsonReader

use of org.apache.hadoop.hive.serde2.json.HiveJsonReader in project hive by apache.

the class JsonSerDe method initialize.

/**
 * Initialize the SerDe.
 *
 * @param conf System properties; can be null in compile time
 * @param tbl table properties
 * @param writeablePrimitivesDeserialize true if outputs are Hadoop Writable
 */
private void initialize(final Configuration conf, final Properties tbl, final boolean writeablePrimitivesDeserialize) {
    log.debug("Initializing JsonSerDe: {}", tbl.entrySet());
    final String nullEmpty = tbl.getProperty(NULL_EMPTY_LINES, "false");
    this.nullEmptyLines = Boolean.parseBoolean(nullEmpty);
    this.rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(getColumnNames(), getColumnTypes());
    this.soi = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(this.rowTypeInfo);
    final TimestampParser tsParser;
    final String parserFormats = tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS);
    if (parserFormats != null) {
        tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(parserFormats));
    } else {
        tsParser = new TimestampParser();
    }
    final String binaryEncodingStr = tbl.getProperty(BINARY_FORMAT, "base64");
    this.binaryEncoding = BinaryEncoding.valueOf(binaryEncodingStr.toUpperCase());
    this.jsonReader = new HiveJsonReader(this.soi, tsParser);
    this.jsonWriter = new HiveJsonWriter(this.binaryEncoding, getColumnNames());
    this.jsonReader.setBinaryEncoding(binaryEncoding);
    this.jsonReader.enable(HiveJsonReader.Feature.COL_INDEX_PARSING);
    if (writeablePrimitivesDeserialize) {
        this.jsonReader.enable(HiveJsonReader.Feature.PRIMITIVE_TO_WRITABLE);
    }
    final String ignoreExtras = tbl.getProperty(IGNORE_EXTRA, "true");
    if (Boolean.parseBoolean(ignoreExtras)) {
        this.jsonReader.enable(HiveJsonReader.Feature.IGNORE_UNKNOWN_FIELDS);
    }
    final String stringifyComplex = tbl.getProperty(STRINGIFY_COMPLEX, "true");
    if (Boolean.parseBoolean(stringifyComplex)) {
        this.jsonReader.enable(HiveJsonReader.Feature.STRINGIFY_COMPLEX_FIELDS);
    }
    log.debug("Initialized SerDe {}", this);
    log.debug("JSON Struct Reader: {}", jsonReader);
    log.debug("JSON Struct Writer: {}", jsonWriter);
}
Also used : HiveJsonReader(org.apache.hadoop.hive.serde2.json.HiveJsonReader) TimestampParser(org.apache.hive.common.util.TimestampParser) HiveJsonWriter(org.apache.hadoop.hive.serde2.json.HiveJsonWriter)

Aggregations

HiveJsonReader (org.apache.hadoop.hive.serde2.json.HiveJsonReader)2 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)1 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 HiveJsonWriter (org.apache.hadoop.hive.serde2.json.HiveJsonWriter)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 TextConverter (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TextConverter)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1 TimestampParser (org.apache.hive.common.util.TimestampParser)1