use of org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo in project hive by apache.
the class LazySerDeParameters method extractColumnInfo.
/**
* Extracts and set column names and column types from the table properties
* @throws SerDeException
*/
public void extractColumnInfo(Configuration conf) throws SerDeException {
// Read the configuration parameters
String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
// NOTE: if "columns.types" is missing, all columns will be of String type
String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
// Parse the configuration parameters
String columnNameDelimiter = tableProperties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tableProperties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty != null && columnNameProperty.length() > 0) {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
} else {
columnNames = new ArrayList<String>();
}
if (columnTypeProperty == null) {
// Default type: all string
StringBuilder sb = new StringBuilder();
for (int i = 0; i < columnNames.size(); i++) {
if (i > 0) {
sb.append(":");
}
sb.append(serdeConstants.STRING_TYPE_NAME);
}
columnTypeProperty = sb.toString();
}
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
// Insert time-zone for timestamp type
if (conf != null) {
final TimestampLocalTZTypeInfo tsTZTypeInfo = new TimestampLocalTZTypeInfo(conf.get(ConfVars.HIVE_LOCAL_TIME_ZONE.varname));
for (int i = 0; i < columnTypes.size(); i++) {
if (columnTypes.get(i) instanceof TimestampLocalTZTypeInfo) {
columnTypes.set(i, tsTZTypeInfo);
}
}
}
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while columns.types has " + columnTypes.size() + " elements!");
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo in project hive by apache.
the class HiveJsonReader method visitLeafNode.
/**
* Visit a node if it is expected to be a primitive value (JSON leaf node).
*
* @param leafNode The node pointing at the JSON object
* @param oi The ObjectInspector to parse the value (must be a
* PrimitiveObjectInspector)
* @return A Java primitive Object
* @throws SerDeException The SerDe is not configured correctly
*/
private Object visitLeafNode(final JsonNode leafNode, final ObjectInspector oi) throws SerDeException {
final PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
final PrimitiveTypeInfo typeInfo = poi.getTypeInfo();
if (typeInfo.getPrimitiveCategory() != PrimitiveCategory.STRING) {
Preconditions.checkArgument(leafNode.getNodeType() != JsonNodeType.OBJECT);
Preconditions.checkArgument(leafNode.getNodeType() != JsonNodeType.ARRAY);
}
switch(typeInfo.getPrimitiveCategory()) {
case INT:
return Integer.valueOf(leafNode.asInt());
case BYTE:
return Byte.valueOf((byte) leafNode.asInt());
case SHORT:
return Short.valueOf((short) leafNode.asInt());
case LONG:
return Long.valueOf(leafNode.asLong());
case BOOLEAN:
return Boolean.valueOf(leafNode.asBoolean());
case FLOAT:
return Float.valueOf((float) leafNode.asDouble());
case DOUBLE:
return Double.valueOf(leafNode.asDouble());
case STRING:
if (leafNode.isValueNode()) {
return leafNode.asText();
} else {
if (isEnabled(Feature.STRINGIFY_COMPLEX_FIELDS)) {
return leafNode.toString();
} else {
throw new SerDeException("Complex field found in JSON does not match table definition: " + typeInfo.getTypeName() + ", please consider enabling `" + JsonSerDe.STRINGIFY_COMPLEX + "` table property");
}
}
case BINARY:
return getByteValue(leafNode);
case DATE:
return Date.valueOf(leafNode.asText());
case TIMESTAMP:
return tsParser.parseTimestamp(leafNode.asText());
case DECIMAL:
return HiveDecimal.create(leafNode.asText());
case TIMESTAMPLOCALTZ:
final Timestamp ts = tsParser.parseTimestamp(leafNode.asText());
final ZoneId zid = ((TimestampLocalTZTypeInfo) typeInfo).timeZone();
final TimestampTZ tstz = new TimestampTZ();
tstz.set(ts.toEpochSecond(), ts.getNanos(), zid);
return tstz;
case VARCHAR:
return new HiveVarchar(leafNode.asText(), ((BaseCharTypeInfo) typeInfo).getLength());
case CHAR:
return new HiveChar(leafNode.asText(), ((BaseCharTypeInfo) typeInfo).getLength());
default:
throw new SerDeException("Could not convert from string to type: " + typeInfo.getTypeName());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo in project hive by apache.
the class DruidSerDe method deserialize.
@Override
public Object deserialize(Writable writable) throws SerDeException {
final DruidWritable input = (DruidWritable) writable;
final List<Object> output = Lists.newArrayListWithExpectedSize(columns.length);
for (int i = 0; i < columns.length; i++) {
final Object value = input.getValue().get(columns[i]);
if (value == null) {
output.add(null);
continue;
}
switch(types[i].getPrimitiveCategory()) {
case TIMESTAMP:
output.add(new TimestampWritable(Timestamp.valueOf(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), tsTZTypeInfo.timeZone()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")).toString())));
break;
case TIMESTAMPLOCALTZ:
output.add(new TimestampLocalTZWritable(new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), ((TimestampLocalTZTypeInfo) types[i]).timeZone()))));
break;
case BYTE:
output.add(new ByteWritable(((Number) value).byteValue()));
break;
case SHORT:
output.add(new ShortWritable(((Number) value).shortValue()));
break;
case INT:
output.add(new IntWritable(((Number) value).intValue()));
break;
case LONG:
output.add(new LongWritable(((Number) value).longValue()));
break;
case FLOAT:
output.add(new FloatWritable(((Number) value).floatValue()));
break;
case DOUBLE:
output.add(new DoubleWritable(((Number) value).doubleValue()));
break;
case DECIMAL:
output.add(new HiveDecimalWritable(HiveDecimal.create(((Number) value).doubleValue())));
break;
case CHAR:
output.add(new HiveCharWritable(new HiveChar(value.toString(), ((CharTypeInfo) types[i]).getLength())));
break;
case VARCHAR:
output.add(new HiveVarcharWritable(new HiveVarchar(value.toString(), ((VarcharTypeInfo) types[i]).getLength())));
break;
case STRING:
output.add(new Text(value.toString()));
break;
case BOOLEAN:
output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
break;
default:
throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());
}
}
return output;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo in project hive by apache.
the class BinarySortableSerDe method deserialize.
static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException {
// Is this field a null?
byte isNull = buffer.read(invert);
if (isNull == nullMarker) {
return null;
}
assert (isNull == notNullMarker);
switch(type.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
switch(ptype.getPrimitiveCategory()) {
case VOID:
{
return null;
}
case BOOLEAN:
{
BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
byte b = buffer.read(invert);
assert (b == 1 || b == 2);
r.set(b == 2);
return r;
}
case BYTE:
{
ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
r.set((byte) (buffer.read(invert) ^ 0x80));
return r;
}
case SHORT:
{
ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
int v = buffer.read(invert) ^ 0x80;
v = (v << 8) + (buffer.read(invert) & 0xff);
r.set((short) v);
return r;
}
case INT:
{
IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
r.set(deserializeInt(buffer, invert));
return r;
}
case LONG:
{
LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
r.set(deserializeLong(buffer, invert));
return r;
}
case FLOAT:
{
FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
int v = 0;
for (int i = 0; i < 4; i++) {
v = (v << 8) + (buffer.read(invert) & 0xff);
}
if ((v & (1 << 31)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1 << 31);
}
r.set(Float.intBitsToFloat(v));
return r;
}
case DOUBLE:
{
DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
long v = 0;
for (int i = 0; i < 8; i++) {
v = (v << 8) + (buffer.read(invert) & 0xff);
}
if ((v & (1L << 63)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1L << 63);
}
r.set(Double.longBitsToDouble(v));
return r;
}
case STRING:
{
Text r = reuse == null ? new Text() : (Text) reuse;
return deserializeText(buffer, invert, r);
}
case CHAR:
{
HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse;
// Use internal text member to read value
deserializeText(buffer, invert, r.getTextValue());
r.enforceMaxLength(getCharacterMaxLength(type));
return r;
}
case VARCHAR:
{
HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
// Use HiveVarchar's internal Text member to read the value.
deserializeText(buffer, invert, r.getTextValue());
// If we cache helper data for deserialization we could avoid having
// to call getVarcharMaxLength() on every deserialize call.
r.enforceMaxLength(getCharacterMaxLength(type));
return r;
}
case BINARY:
{
BytesWritable bw = new BytesWritable();
// Get the actual length first
int start = buffer.tell();
int length = 0;
do {
byte b = buffer.read(invert);
if (b == 0) {
// end of string
break;
}
if (b == 1) {
// the last char is an escape char. read the actual char
buffer.read(invert);
}
length++;
} while (true);
if (length == buffer.tell() - start) {
// No escaping happened, so we are already done.
bw.set(buffer.getData(), start, length);
} else {
// Escaping happened, we need to copy byte-by-byte.
// 1. Set the length first.
bw.set(buffer.getData(), start, length);
// 2. Reset the pointer.
buffer.seek(start);
// 3. Copy the data.
byte[] rdata = bw.getBytes();
for (int i = 0; i < length; i++) {
byte b = buffer.read(invert);
if (b == 1) {
// The last char is an escape char, read the actual char.
// The serialization format escape \0 to \1, and \1 to \2,
// to make sure the string is null-terminated.
b = (byte) (buffer.read(invert) - 1);
}
rdata[i] = b;
}
// 4. Read the null terminator.
byte b = buffer.read(invert);
assert (b == 0);
}
return bw;
}
case DATE:
{
DateWritableV2 d = reuse == null ? new DateWritableV2() : (DateWritableV2) reuse;
d.set(deserializeInt(buffer, invert));
return d;
}
case TIMESTAMP:
TimestampWritableV2 t = (reuse == null ? new TimestampWritableV2() : (TimestampWritableV2) reuse);
byte[] bytes = new byte[TimestampWritableV2.BINARY_SORTABLE_LENGTH];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = buffer.read(invert);
}
t.setBinarySortable(bytes, 0);
return t;
case TIMESTAMPLOCALTZ:
TimestampLocalTZWritable tstz = (reuse == null ? new TimestampLocalTZWritable() : (TimestampLocalTZWritable) reuse);
byte[] data = new byte[TimestampLocalTZWritable.BINARY_SORTABLE_LENGTH];
for (int i = 0; i < data.length; i++) {
data[i] = buffer.read(invert);
}
// Across MR process boundary tz is normalized and stored in type
// and is not carried in data for each row.
tstz.fromBinarySortable(data, 0, ((TimestampLocalTZTypeInfo) type).timeZone());
return tstz;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthWritable i = reuse == null ? new HiveIntervalYearMonthWritable() : (HiveIntervalYearMonthWritable) reuse;
i.set(deserializeInt(buffer, invert));
return i;
}
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeWritable i = reuse == null ? new HiveIntervalDayTimeWritable() : (HiveIntervalDayTimeWritable) reuse;
long totalSecs = deserializeLong(buffer, invert);
int nanos = deserializeInt(buffer, invert);
i.set(totalSecs, nanos);
return i;
}
case DECIMAL:
{
// See serialization of decimal for explanation (below)
HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);
int b = buffer.read(invert) - 1;
assert (b == 1 || b == -1 || b == 0);
boolean positive = b != -1;
int factor = buffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
factor = (factor << 8) + (buffer.read(invert) & 0xff);
}
if (!positive) {
factor = -factor;
}
int start = buffer.tell();
int length = 0;
do {
b = buffer.read(positive ? invert : !invert);
assert (b != 1);
if (b == 0) {
// end of digits
break;
}
length++;
} while (true);
final byte[] decimalBuffer = new byte[length];
buffer.seek(start);
for (int i = 0; i < length; ++i) {
decimalBuffer[i] = buffer.read(positive ? invert : !invert);
}
// read the null byte again
buffer.read(positive ? invert : !invert);
String digits = new String(decimalBuffer, 0, length, decimalCharSet);
BigInteger bi = new BigInteger(digits);
HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor - length);
if (!positive) {
bd = bd.negate();
}
bdw.set(bd);
return bdw;
}
default:
{
throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
}
}
}
case LIST:
{
ListTypeInfo ltype = (ListTypeInfo) type;
TypeInfo etype = ltype.getListElementTypeInfo();
// Create the list if needed
ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;
// Read the list
int size = 0;
while (true) {
int more = buffer.read(invert);
if (more == 0) {
// \0 to terminate
break;
}
// \1 followed by each element
assert (more == 1);
if (size == r.size()) {
r.add(null);
}
r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size)));
size++;
}
// Remove additional elements if the list is reused
while (r.size() > size) {
r.remove(r.size() - 1);
}
return r;
}
case MAP:
{
MapTypeInfo mtype = (MapTypeInfo) type;
TypeInfo ktype = mtype.getMapKeyTypeInfo();
TypeInfo vtype = mtype.getMapValueTypeInfo();
// Create the map if needed
Map<Object, Object> r;
if (reuse == null || reuse.getClass() != LinkedHashMap.class) {
r = new LinkedHashMap<Object, Object>();
} else {
r = (Map<Object, Object>) reuse;
r.clear();
}
while (true) {
int more = buffer.read(invert);
if (more == 0) {
// \0 to terminate
break;
}
// \1 followed by each key and then each value
assert (more == 1);
Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null);
Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null);
r.put(k, v);
}
return r;
}
case STRUCT:
{
StructTypeInfo stype = (StructTypeInfo) type;
List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
int size = fieldTypes.size();
// Create the struct if needed
ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
assert (r.size() <= size);
// Set the size of the struct
while (r.size() < size) {
r.add(null);
}
// Read one field by one field
for (int eid = 0; eid < size; eid++) {
r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r.get(eid)));
}
return r;
}
case UNION:
{
UnionTypeInfo utype = (UnionTypeInfo) type;
StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
// Read the tag
byte tag = buffer.read(invert);
r.setTag(tag);
r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, nullMarker, notNullMarker, null));
return r;
}
default:
{
throw new RuntimeException("Unrecognized type: " + type.getCategory());
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo in project hive by apache.
the class KafkaJsonSerDe method parseAsPrimitive.
private Object parseAsPrimitive(JsonNode value, TypeInfo typeInfo) throws SerDeException {
switch(TypeInfoFactory.getPrimitiveTypeInfo(typeInfo.getTypeName()).getPrimitiveCategory()) {
case TIMESTAMP:
TimestampWritable timestampWritable = new TimestampWritable();
timestampWritable.setTime(TS_PARSER.get().parseMillis(value.textValue()));
return timestampWritable;
case TIMESTAMPLOCALTZ:
final long numberOfMillis = TS_PARSER.get().parseMillis(value.textValue());
return new TimestampLocalTZWritable(new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(numberOfMillis), ((TimestampLocalTZTypeInfo) typeInfo).timeZone())));
case BYTE:
return new ByteWritable((byte) value.intValue());
case SHORT:
return (new ShortWritable(value.shortValue()));
case INT:
return new IntWritable(value.intValue());
case LONG:
return (new LongWritable((value.longValue())));
case FLOAT:
return (new FloatWritable(value.floatValue()));
case DOUBLE:
return (new DoubleWritable(value.doubleValue()));
case DECIMAL:
return (new HiveDecimalWritable(HiveDecimal.create(value.decimalValue())));
case CHAR:
return (new HiveCharWritable(new HiveChar(value.textValue(), ((CharTypeInfo) typeInfo).getLength())));
case VARCHAR:
return (new HiveVarcharWritable(new HiveVarchar(value.textValue(), ((CharTypeInfo) typeInfo).getLength())));
case STRING:
return (new Text(value.textValue()));
case BOOLEAN:
return (new BooleanWritable(value.isBoolean() ? value.booleanValue() : Boolean.valueOf(value.textValue())));
default:
throw new SerDeException("Unknown type: " + typeInfo.getTypeName());
}
}
Aggregations