Search in sources :

Example 51 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class SerdeRandomRowSource method getRandHiveVarchar.

public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
    return hiveVarchar;
}
Also used : HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar)

Example 52 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class DruidSerDe method deserialize.

@Override
public Object deserialize(Writable writable) throws SerDeException {
    final DruidWritable input = (DruidWritable) writable;
    final List<Object> output = Lists.newArrayListWithExpectedSize(columns.length);
    for (int i = 0; i < columns.length; i++) {
        final Object value = input.getValue().get(columns[i]);
        if (value == null) {
            output.add(null);
            continue;
        }
        switch(types[i].getPrimitiveCategory()) {
            case TIMESTAMP:
                output.add(new TimestampWritable(Timestamp.valueOf(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), tsTZTypeInfo.timeZone()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")).toString())));
                break;
            case TIMESTAMPLOCALTZ:
                output.add(new TimestampLocalTZWritable(new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), ((TimestampLocalTZTypeInfo) types[i]).timeZone()))));
                break;
            case BYTE:
                output.add(new ByteWritable(((Number) value).byteValue()));
                break;
            case SHORT:
                output.add(new ShortWritable(((Number) value).shortValue()));
                break;
            case INT:
                output.add(new IntWritable(((Number) value).intValue()));
                break;
            case LONG:
                output.add(new LongWritable(((Number) value).longValue()));
                break;
            case FLOAT:
                output.add(new FloatWritable(((Number) value).floatValue()));
                break;
            case DOUBLE:
                output.add(new DoubleWritable(((Number) value).doubleValue()));
                break;
            case DECIMAL:
                output.add(new HiveDecimalWritable(HiveDecimal.create(((Number) value).doubleValue())));
                break;
            case CHAR:
                output.add(new HiveCharWritable(new HiveChar(value.toString(), ((CharTypeInfo) types[i]).getLength())));
                break;
            case VARCHAR:
                output.add(new HiveVarcharWritable(new HiveVarchar(value.toString(), ((VarcharTypeInfo) types[i]).getLength())));
                break;
            case STRING:
                output.add(new Text(value.toString()));
                break;
            case BOOLEAN:
                output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
                break;
            default:
                throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());
        }
    }
    return output;
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) TimestampTZ(org.apache.hadoop.hive.common.type.TimestampTZ) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 53 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class JsonSerDe method extractCurrentField.

/**
 * Utility method to extract current expected field from given JsonParser
 *
 * isTokenCurrent is a boolean variable also passed in, which determines
 * if the JsonParser is already at the token we expect to read next, or
 * needs advancing to the next before we read.
 */
private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException {
    Object val = null;
    JsonToken valueToken;
    if (isTokenCurrent) {
        valueToken = p.getCurrentToken();
    } else {
        valueToken = p.nextToken();
    }
    switch(hcatFieldSchema.getType()) {
        case INT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
            break;
        case TINYINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
            break;
        case SMALLINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
            break;
        case BIGINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
            break;
        case BOOLEAN:
            String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (bval != null) {
                val = Boolean.valueOf(bval);
            } else {
                val = null;
            }
            break;
        case FLOAT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
            break;
        case DOUBLE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
            break;
        case STRING:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            break;
        case BINARY:
            String b = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (b != null) {
                try {
                    String t = Text.decode(b.getBytes(), 0, b.getBytes().length);
                    return t.getBytes();
                } catch (CharacterCodingException e) {
                    LOG.warn("Error generating json binary type from object.", e);
                    return null;
                }
            } else {
                val = null;
            }
            break;
        case DATE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
            break;
        case TIMESTAMP:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText());
            break;
        case DECIMAL:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
            break;
        case VARCHAR:
            int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
            break;
        case CHAR:
            int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
            break;
        case ARRAY:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_ARRAY) {
                throw new IOException("Start of Array expected");
            }
            List<Object> arr = new ArrayList<Object>();
            while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
                arr.add(extractCurrentField(p, hcatFieldSchema.getArrayElementSchema().get(0), true));
            }
            val = arr;
            break;
        case MAP:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            Map<Object, Object> map = new LinkedHashMap<Object, Object>();
            HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
                Object v = extractCurrentField(p, valueSchema, false);
                map.put(k, v);
            }
            val = map;
            break;
        case STRUCT:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
            int sz = subSchema.getFieldNames().size();
            List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                populateRecord(struct, valueToken, p, subSchema);
            }
            val = struct;
            break;
        default:
            LOG.error("Unknown type found: " + hcatFieldSchema.getType());
            return null;
    }
    return val;
}
Also used : BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) CharacterCodingException(java.nio.charset.CharacterCodingException) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) JsonToken(org.codehaus.jackson.JsonToken)

Example 54 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilderComplexTypes2.

/**
 * Check the converted filter predicate is null if unsupported types are included
 * @throws Exception
 */
@Test
public void testBuilderComplexTypes2() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.DATE, Date.valueOf("2005-3-12")).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0")).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required binary y; required binary z;}");
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
    sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0")).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, new HiveVarchar("stinger", 100).toString()).end().end().build();
    schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required binary y; required int32 z;" + " optional binary a;}");
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 55 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilderComplexTypes.

/**
 * Check the converted filter predicate is null if unsupported types are included
 * @throws Exception
 */
@Test
public void testBuilderComplexTypes() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.DATE, Date.valueOf("1970-1-11")).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0")).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required binary y; required binary z;}");
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
    sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0")).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, new HiveVarchar("stinger", 100).toString()).end().end().build();
    schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required binary y; required int32 z;" + " optional binary a;}");
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)95 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)61 Test (org.junit.Test)35 Text (org.apache.hadoop.io.Text)31 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)28 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)27 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)23 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)21 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)21 ArrayList (java.util.ArrayList)20 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)20 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)20 LongWritable (org.apache.hadoop.io.LongWritable)19 Date (org.apache.hadoop.hive.common.type.Date)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)17 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)17 BooleanWritable (org.apache.hadoop.io.BooleanWritable)17 FloatWritable (org.apache.hadoop.io.FloatWritable)17