Search in sources :

Example 6 with Timestamp

use of org.apache.hadoop.hive.common.type.Timestamp in project hive by apache.

the class HiveIcebergStorageHandler method collectColumnAndReplaceDummyValues.

/**
 * Recursively replaces the ExprNodeDynamicListDesc nodes by a dummy ExprNodeConstantDesc so we can test if we can
 * convert the predicate to an Iceberg predicate when pruning the partitions later. Also collects the column names
 * in the filter.
 * <p>
 * Please make sure that it is ok to change the input node (clone if needed)
 * @param node The node we are traversing
 * @param foundColumn The column we already found
 */
private String collectColumnAndReplaceDummyValues(ExprNodeDesc node, String foundColumn) {
    String column = foundColumn;
    List<ExprNodeDesc> children = node.getChildren();
    if (children != null && !children.isEmpty()) {
        ListIterator<ExprNodeDesc> iterator = children.listIterator();
        while (iterator.hasNext()) {
            ExprNodeDesc child = iterator.next();
            if (child instanceof ExprNodeDynamicListDesc) {
                Object dummy;
                switch(((PrimitiveTypeInfo) child.getTypeInfo()).getPrimitiveCategory()) {
                    case INT:
                    case SHORT:
                        dummy = 1;
                        break;
                    case LONG:
                        dummy = 1L;
                        break;
                    case TIMESTAMP:
                    case TIMESTAMPLOCALTZ:
                        dummy = new Timestamp();
                        break;
                    case CHAR:
                    case VARCHAR:
                    case STRING:
                        dummy = "1";
                        break;
                    case DOUBLE:
                    case FLOAT:
                    case DECIMAL:
                        dummy = 1.1;
                        break;
                    case DATE:
                        dummy = new Date();
                        break;
                    case BOOLEAN:
                        dummy = true;
                        break;
                    default:
                        throw new UnsupportedOperationException("Not supported primitive type in partition pruning: " + child.getTypeInfo());
                }
                iterator.set(new ExprNodeConstantDesc(child.getTypeInfo(), dummy));
            } else {
                String newColumn;
                if (child instanceof ExprNodeColumnDesc) {
                    newColumn = ((ExprNodeColumnDesc) child).getColumn();
                } else {
                    newColumn = collectColumnAndReplaceDummyValues(child, column);
                }
                if (column != null && newColumn != null && !newColumn.equals(column)) {
                    throw new UnsupportedOperationException("Partition pruning does not support filtering for more columns");
                }
                if (column == null) {
                    column = newColumn;
                }
            }
        }
    }
    return column;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDynamicListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(org.apache.hadoop.hive.common.type.Date)

Example 7 with Timestamp

use of org.apache.hadoop.hive.common.type.Timestamp in project hive by apache.

the class IcebergTimestampObjectInspectorHive3 method copyObject.

@Override
public Object copyObject(Object o) {
    if (o == null) {
        return null;
    }
    if (o instanceof Timestamp) {
        Timestamp ts = (Timestamp) o;
        Timestamp copy = new Timestamp(ts);
        copy.setNanos(ts.getNanos());
        return copy;
    } else if (o instanceof LocalDateTime) {
        return LocalDateTime.of(((LocalDateTime) o).toLocalDate(), ((LocalDateTime) o).toLocalTime());
    } else {
        return o;
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) Timestamp(org.apache.hadoop.hive.common.type.Timestamp)

Example 8 with Timestamp

use of org.apache.hadoop.hive.common.type.Timestamp in project hive by apache.

the class HiveSqlDateTimeFormatter method getTimestampFromValues.

/**
 * Create a timestamp from the list of values parsed from the input and the list of tokens
 * parsed from the pattern input.
 *
 * We need to be able to parse input like "29.02.2000" (with pattern "dd.mm.yyyy")
 * correctly – if we assigned the day value to the timestamp before the year value, then
 * output would be 2000-02-28. So before creating the Timestamp we have to:
 *
 *  - Make a list of pairs.
 *    Left value: only the Tokens that represent a temporal value
 *    Right value: their corresponding int values parsed from the input
 *  - Sort this list by length of base unit, in descending order (years before months, etc.).
 *  - Then create the parsed output Timestamp object by creating a LocalDateTime object using the
 *    token's TemporalField and the value.
 *
 * @param tokens list of tokens of any type, in order of pattern input
 * @param temporalValues list of integer values parsed from the input, in order of input
 * @return the parsed Timestamp
 * @throws IllegalStateException if temporal values list and tokens in tokens list sizes are not
 * equal
 */
private Timestamp getTimestampFromValues(List<Token> tokens, List<Integer> temporalValues) {
    // Get list of temporal Tokens
    List<Token> temporalTokens = tokens.stream().filter(token -> token.type == TokenType.NUMERIC_TEMPORAL || token.type == TokenType.CHARACTER_TEMPORAL).collect(Collectors.toList());
    Preconditions.checkState(temporalTokens.size() == temporalValues.size(), "temporalTokens list length (" + temporalTokens.size() + ") differs from that of temporalValues (length: " + temporalValues.size() + ")");
    // Get sorted list of temporal Token/value Pairs
    List<ImmutablePair<Token, Integer>> tokenValueList = new ArrayList<>(temporalTokens.size());
    for (int i = 0; i < temporalTokens.size(); i++) {
        ImmutablePair<Token, Integer> pair = new ImmutablePair<>(temporalTokens.get(i), temporalValues.get(i));
        tokenValueList.add(pair);
    }
    tokenValueList.sort(((Comparator<ImmutablePair<Token, Integer>>) (o1, o2) -> {
        Token token1 = o1.left;
        Token token2 = o2.left;
        return token1.temporalField.getBaseUnit().getDuration().compareTo(token2.temporalField.getBaseUnit().getDuration());
    }).reversed());
    // Create Timestamp
    LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC);
    for (Pair<Token, Integer> pair : tokenValueList) {
        TemporalField temporalField = pair.getLeft().temporalField;
        int value = pair.getRight();
        try {
            ldt = ldt.with(temporalField, value);
        } catch (DateTimeException e) {
            throw new IllegalArgumentException("Value " + value + " not valid for token " + temporalField);
        }
    }
    return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
}
Also used : ChronoField(java.time.temporal.ChronoField) TemporalUnit(java.time.temporal.TemporalUnit) LocalDateTime(java.time.LocalDateTime) Date(org.apache.hadoop.hive.common.type.Date) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) Matcher(java.util.regex.Matcher) ImmutableList(com.google.common.collect.ImmutableList) Pair(org.apache.commons.lang3.tuple.Pair) Optional(com.google.common.base.Optional) Locale(java.util.Locale) Map(java.util.Map) ZoneOffset(java.time.ZoneOffset) DateTimeException(java.time.DateTimeException) WeekFields(java.time.temporal.WeekFields) ImmutableMap(com.google.common.collect.ImmutableMap) Month(java.time.Month) TextStyle(java.time.format.TextStyle) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) TemporalField(java.time.temporal.TemporalField) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) ChronoUnit(java.time.temporal.ChronoUnit) IsoFields(java.time.temporal.IsoFields) DayOfWeek(java.time.DayOfWeek) DateTimeFormatter(java.time.format.DateTimeFormatter) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) LocalDateTime(java.time.LocalDateTime) TemporalField(java.time.temporal.TemporalField) ArrayList(java.util.ArrayList) DateTimeException(java.time.DateTimeException) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 9 with Timestamp

use of org.apache.hadoop.hive.common.type.Timestamp in project hive by apache.

the class ReaderWriter method writeDatum.

public static void writeDatum(DataOutput out, Object val) throws IOException {
    // write the data type
    byte type = DataType.findType(val);
    out.write(type);
    switch(type) {
        case DataType.LIST:
            List<?> list = (List<?>) val;
            int sz = list.size();
            out.writeInt(sz);
            for (int i = 0; i < sz; i++) {
                writeDatum(out, list.get(i));
            }
            return;
        case DataType.MAP:
            Map<?, ?> m = (Map<?, ?>) val;
            out.writeInt(m.size());
            Iterator<?> i = m.entrySet().iterator();
            while (i.hasNext()) {
                Entry<?, ?> entry = (Entry<?, ?>) i.next();
                writeDatum(out, entry.getKey());
                writeDatum(out, entry.getValue());
            }
            return;
        case DataType.INTEGER:
            new VIntWritable((Integer) val).write(out);
            return;
        case DataType.LONG:
            new VLongWritable((Long) val).write(out);
            return;
        case DataType.FLOAT:
            out.writeFloat((Float) val);
            return;
        case DataType.DOUBLE:
            out.writeDouble((Double) val);
            return;
        case DataType.BOOLEAN:
            out.writeBoolean((Boolean) val);
            return;
        case DataType.BYTE:
            out.writeByte((Byte) val);
            return;
        case DataType.SHORT:
            out.writeShort((Short) val);
            return;
        case DataType.STRING:
            String s = (String) val;
            byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
            out.writeInt(utfBytes.length);
            out.write(utfBytes);
            return;
        case DataType.BINARY:
            byte[] ba = (byte[]) val;
            out.writeInt(ba.length);
            out.write(ba);
            return;
        case DataType.NULL:
            // for NULL we just write out the type
            return;
        case DataType.CHAR:
            new HiveCharWritable((HiveChar) val).write(out);
            return;
        case DataType.VARCHAR:
            new HiveVarcharWritable((HiveVarchar) val).write(out);
            return;
        case DataType.DECIMAL:
            new HiveDecimalWritable((HiveDecimal) val).write(out);
            return;
        case DataType.DATE:
            new DateWritableV2((Date) val).write(out);
            return;
        case DataType.TIMESTAMP:
            new TimestampWritableV2((Timestamp) val).write(out);
            return;
        default:
            throw new IOException("Unexpected data type " + type + " found in stream.");
    }
}
Also used : VIntWritable(org.apache.hadoop.io.VIntWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Entry(java.util.Map.Entry) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) IOException(java.io.IOException) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Date(org.apache.hadoop.hive.common.type.Date) VLongWritable(org.apache.hadoop.io.VLongWritable) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 10 with Timestamp

use of org.apache.hadoop.hive.common.type.Timestamp in project hive by apache.

the class VectorizedRowBatchCtx method addPartitionColsToBatch.

public void addPartitionColsToBatch(ColumnVector col, Object value, int colIndex) {
    String partitionColumnName = rowColumnNames[colIndex];
    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) rowColumnTypeInfos[colIndex];
    switch(primitiveTypeInfo.getPrimitiveCategory()) {
        case BOOLEAN:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill((Boolean) value == true ? 1 : 0);
                }
            }
            break;
        case BYTE:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill((Byte) value);
                }
            }
            break;
        case SHORT:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill((Short) value);
                }
            }
            break;
        case INT:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill((Integer) value);
                }
            }
            break;
        case LONG:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill((Long) value);
                }
            }
            break;
        case DATE:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill(DateWritableV2.dateToDays((Date) value));
                }
            }
            break;
        case TIMESTAMP:
            {
                TimestampColumnVector lcv = (TimestampColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill(((Timestamp) value).toSqlTimestamp());
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
            {
                LongColumnVector lcv = (LongColumnVector) col;
                if (value == null) {
                    lcv.noNulls = false;
                    lcv.isNull[0] = true;
                    lcv.isRepeating = true;
                } else {
                    lcv.fill(((HiveIntervalYearMonth) value).getTotalMonths());
                }
            }
            break;
        case INTERVAL_DAY_TIME:
            {
                IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) col;
                if (value == null) {
                    icv.noNulls = false;
                    icv.isNull[0] = true;
                    icv.isRepeating = true;
                } else {
                    icv.fill(((HiveIntervalDayTime) value));
                }
            }
            break;
        case FLOAT:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) col;
                if (value == null) {
                    dcv.noNulls = false;
                    dcv.isNull[0] = true;
                    dcv.isRepeating = true;
                } else {
                    dcv.fill((Float) value);
                }
            }
            break;
        case DOUBLE:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) col;
                if (value == null) {
                    dcv.noNulls = false;
                    dcv.isNull[0] = true;
                    dcv.isRepeating = true;
                } else {
                    dcv.fill((Double) value);
                }
            }
            break;
        case DECIMAL:
            {
                DataTypePhysicalVariation dataTypePhysicalVariation = rowDataTypePhysicalVariations != null ? rowDataTypePhysicalVariations[colIndex] : DataTypePhysicalVariation.NONE;
                if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                    Decimal64ColumnVector dv = (Decimal64ColumnVector) col;
                    if (value == null) {
                        dv.noNulls = false;
                        dv.isNull[0] = true;
                        dv.isRepeating = true;
                    } else {
                        dv.fill(((HiveDecimal) value).longValue());
                    }
                } else {
                    DecimalColumnVector dv = (DecimalColumnVector) col;
                    if (value == null) {
                        dv.noNulls = false;
                        dv.isNull[0] = true;
                        dv.isRepeating = true;
                    } else {
                        dv.fill((HiveDecimal) value);
                    }
                }
            }
            break;
        case BINARY:
            {
                BytesColumnVector bcv = (BytesColumnVector) col;
                byte[] bytes = (byte[]) value;
                if (bytes == null) {
                    bcv.noNulls = false;
                    bcv.isNull[0] = true;
                    bcv.isRepeating = true;
                } else {
                    bcv.fill(bytes);
                }
            }
            break;
        case STRING:
        case CHAR:
        case VARCHAR:
            {
                BytesColumnVector bcv = (BytesColumnVector) col;
                String sVal = value.toString();
                if (sVal == null) {
                    bcv.noNulls = false;
                    bcv.isNull[0] = true;
                    bcv.isRepeating = true;
                } else {
                    bcv.fill(sVal.getBytes());
                }
            }
            break;
        default:
            throw new RuntimeException("Unable to recognize the partition type " + primitiveTypeInfo.getPrimitiveCategory() + " for column " + partitionColumnName);
    }
}
Also used : Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Aggregations

Timestamp (org.apache.hadoop.hive.common.type.Timestamp)116 Test (org.junit.Test)36 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)32 Date (org.apache.hadoop.hive.common.type.Date)27 BytesWritable (org.apache.hadoop.io.BytesWritable)25 LongWritable (org.apache.hadoop.io.LongWritable)25 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)24 Text (org.apache.hadoop.io.Text)22 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)21 IntWritable (org.apache.hadoop.io.IntWritable)21 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)20 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)20 BooleanWritable (org.apache.hadoop.io.BooleanWritable)19 FloatWritable (org.apache.hadoop.io.FloatWritable)19 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)18 ArrayList (java.util.ArrayList)17 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)17 HiveIntervalDayTime (org.apache.hadoop.hive.common.type.HiveIntervalDayTime)16 List (java.util.List)15 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)12