Search in sources :

Example 16 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestVectorSerDeRow method getObjectDisplayString.

private String getObjectDisplayString(Object object) {
    StringBuilder sb = new StringBuilder();
    if (object == null) {
        sb.append("NULL");
    } else if (object instanceof Text || object instanceof HiveChar || object instanceof HiveCharWritable || object instanceof HiveVarchar || object instanceof HiveVarcharWritable) {
        final String string;
        if (object instanceof Text) {
            Text text = (Text) object;
            string = text.toString();
        } else if (object instanceof HiveChar) {
            HiveChar hiveChar = (HiveChar) object;
            string = hiveChar.getStrippedValue();
        } else if (object instanceof HiveCharWritable) {
            HiveChar hiveChar = ((HiveCharWritable) object).getHiveChar();
            string = hiveChar.getStrippedValue();
        } else if (object instanceof HiveVarchar) {
            HiveVarchar hiveVarchar = (HiveVarchar) object;
            string = hiveVarchar.getValue();
        } else if (object instanceof HiveVarcharWritable) {
            HiveVarchar hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
            string = hiveVarchar.getValue();
        } else {
            throw new RuntimeException("Unexpected");
        }
        byte[] bytes = string.getBytes();
        final int byteLength = bytes.length;
        sb.append("'");
        sb.append(string);
        sb.append("' (byte length ");
        sb.append(bytes.length);
        sb.append(", string length ");
        sb.append(string.length());
        sb.append(", bytes ");
        sb.append(VectorizedBatchUtil.displayBytes(bytes, 0, byteLength));
        sb.append(")");
    } else {
        sb.append(object.toString());
    }
    return sb.toString();
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar)

Example 17 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class RexNodeConverter method convert.

protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
    final RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
    final PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
    final RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
    PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
    ConstantObjectInspector coi = literal.getWritableObjectInspector();
    Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), coi);
    RexNode calciteLiteral = null;
    // If value is null, the type should also be VOID.
    if (value == null) {
        hiveTypeCategory = PrimitiveCategory.VOID;
    }
    // TODO: Verify if we need to use ConstantObjectInspector to unwrap data
    switch(hiveTypeCategory) {
        case BOOLEAN:
            calciteLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue());
            break;
        case BYTE:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
            break;
        case SHORT:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
            break;
        case INT:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
            break;
        case LONG:
            calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
            break;
        case DECIMAL:
            if (value instanceof HiveDecimal) {
                value = ((HiveDecimal) value).bigDecimalValue();
            } else if (value instanceof Decimal128) {
                value = ((Decimal128) value).toBigDecimal();
            }
            if (value == null) {
                // literals.
                throw new CalciteSemanticException("Expression " + literal.getExprString() + " is not a valid decimal", UnsupportedFeature.Invalid_decimal);
            // TODO: return createNullLiteral(literal);
            }
            calciteLiteral = rexBuilder.makeExactLiteral((BigDecimal) value, calciteDataType);
            break;
        case FLOAT:
            calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Float.toString((Float) value)), calciteDataType);
            break;
        case DOUBLE:
            // TODO: The best solution is to support NaN in expression reduction.
            if (Double.isNaN((Double) value)) {
                throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal);
            }
            calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Double.toString((Double) value)), calciteDataType);
            break;
        case CHAR:
            if (value instanceof HiveChar) {
                value = ((HiveChar) value).getValue();
            }
            final int lengthChar = TypeInfoUtils.getCharacterLengthForType(hiveType);
            RelDataType charType = rexBuilder.getTypeFactory().createTypeWithCharsetAndCollation(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.CHAR, lengthChar), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            calciteLiteral = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString((String) value), charType, false);
            break;
        case VARCHAR:
            if (value instanceof HiveVarchar) {
                value = ((HiveVarchar) value).getValue();
            }
            final int lengthVarchar = TypeInfoUtils.getCharacterLengthForType(hiveType);
            RelDataType varcharType = rexBuilder.getTypeFactory().createTypeWithCharsetAndCollation(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, lengthVarchar), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            calciteLiteral = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString((String) value), varcharType, true);
            break;
        case STRING:
            RelDataType stringType = rexBuilder.getTypeFactory().createTypeWithCharsetAndCollation(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            calciteLiteral = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString((String) value), stringType, true);
            break;
        case DATE:
            final Date date = (Date) value;
            calciteLiteral = rexBuilder.makeDateLiteral(DateString.fromDaysSinceEpoch(date.toEpochDay()));
            break;
        case TIMESTAMP:
            final TimestampString tsString;
            if (value instanceof Calendar) {
                tsString = TimestampString.fromCalendarFields((Calendar) value);
            } else {
                final Timestamp ts = (Timestamp) value;
                tsString = TimestampString.fromMillisSinceEpoch(ts.toEpochMilli()).withNanos(ts.getNanos());
            }
            // Must call makeLiteral, not makeTimestampLiteral
            // to have the RexBuilder.roundTime logic kick in
            calciteLiteral = rexBuilder.makeLiteral(tsString, rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP, rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), false);
            break;
        case TIMESTAMPLOCALTZ:
            final TimestampString tsLocalTZString;
            Instant i = ((TimestampTZ) value).getZonedDateTime().toInstant();
            tsLocalTZString = TimestampString.fromMillisSinceEpoch(i.toEpochMilli()).withNanos(i.getNano());
            calciteLiteral = rexBuilder.makeTimestampWithLocalTimeZoneLiteral(tsLocalTZString, rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE));
            break;
        case INTERVAL_YEAR_MONTH:
            // Calcite year-month literal value is months as BigDecimal
            BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths());
            calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
            break;
        case INTERVAL_DAY_TIME:
            // Calcite day-time interval is millis value as BigDecimal
            // Seconds converted to millis
            BigDecimal secsValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000);
            // Nanos converted to millis
            BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getNanos(), 6);
            calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new SqlParserPos(1, 1)));
            break;
        case VOID:
            calciteLiteral = rexBuilder.makeLiteral(null, calciteDataType, true);
            break;
        case BINARY:
        case UNKNOWN:
        default:
            throw new RuntimeException("Unsupported Literal");
    }
    return calciteLiteral;
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) RelDataType(org.apache.calcite.rel.type.RelDataType) GenericUDFTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SqlIntervalQualifier(org.apache.calcite.sql.SqlIntervalQualifier) Calendar(java.util.Calendar) Instant(java.time.Instant) Decimal128(org.apache.hadoop.hive.common.type.Decimal128) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal) Date(org.apache.hadoop.hive.common.type.Date) HiveFloorDate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate) HiveExtractDate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate) GenericUDFToDate(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) TimestampString(org.apache.calcite.util.TimestampString) RexNode(org.apache.calcite.rex.RexNode)

Example 18 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestVectorStringExpressions method testStringColCompareVarCharScalarProjection.

@Test
public void testStringColCompareVarCharScalarProjection() throws HiveException {
    VectorizedRowBatch batch = makeStringBatch();
    VectorExpression expr;
    expr = new StringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(red2), 8).getValue().getBytes(), 2);
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    LongColumnVector outVector = (LongColumnVector) batch.cols[2];
    Assert.assertEquals(1, outVector.vector[0]);
    Assert.assertEquals(0, outVector.vector[1]);
    Assert.assertEquals(0, outVector.vector[2]);
    batch = makeStringBatch();
    expr = new StringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(green), 10).getValue().getBytes(), 2);
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    outVector = (LongColumnVector) batch.cols[2];
    Assert.assertEquals(0, outVector.vector[0]);
    Assert.assertEquals(1, outVector.vector[1]);
    Assert.assertEquals(0, outVector.vector[2]);
}
Also used : StringGroupColEqualVarCharScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar) FilterStringGroupColEqualVarCharScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 19 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestVectorStringExpressions method testColConcatVarCharScalar.

@Test
public void testColConcatVarCharScalar() throws HiveException {
    // has nulls, not repeating
    VectorizedRowBatch batch = makeStringBatch();
    StringGroupColConcatStringScalar expr = new StringGroupColConcatStringScalar(0, new HiveVarchar(new String(red), 14).getValue().getBytes(), 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isNull[2]);
    int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    Assert.assertFalse(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // no nulls, not repeating
    batch = makeStringBatch();
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]);
    Assert.assertEquals(0, cmp3);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // has nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    // no nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertTrue(outCol.noNulls);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Test(org.junit.Test)

Example 20 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestVectorStringExpressions method testStringColCompareVarCharScalarFilter.

@Test
public // Test string column to VARCHAR literal comparison
void testStringColCompareVarCharScalarFilter() throws HiveException {
    VectorizedRowBatch batch = makeStringBatch();
    VectorExpression expr;
    expr = new FilterStringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(red2), 10).getValue().getBytes());
    expr.evaluate(batch);
    // only red qualifies, and it's in entry 0
    Assert.assertTrue(batch.size == 1);
    Assert.assertTrue(batch.selected[0] == 0);
    batch = makeStringBatch();
    expr = new FilterStringGroupColLessVarCharScalar(0, new HiveVarchar(new String(red2), 8).getValue().getBytes());
    expr.evaluate(batch);
    // only green qualifies, and it's in entry 1
    Assert.assertTrue(batch.size == 1);
    Assert.assertTrue(batch.selected[0] == 1);
    batch = makeStringBatch();
    expr = new FilterStringGroupColGreaterEqualVarCharScalar(0, new HiveVarchar(new String(green), 12).getValue().getBytes());
    expr.evaluate(batch);
    // green and red qualify
    Assert.assertTrue(batch.size == 2);
    Assert.assertTrue(batch.selected[0] == 0);
    Assert.assertTrue(batch.selected[1] == 1);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) FilterStringGroupColLessVarCharScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FilterStringGroupColEqualVarCharScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar) FilterStringGroupColGreaterEqualVarCharScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar) Test(org.junit.Test)

Aggregations

HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)95 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)61 Test (org.junit.Test)35 Text (org.apache.hadoop.io.Text)31 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)28 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)27 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)23 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)21 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)21 ArrayList (java.util.ArrayList)20 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)20 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)20 LongWritable (org.apache.hadoop.io.LongWritable)19 Date (org.apache.hadoop.hive.common.type.Date)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)17 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)17 BooleanWritable (org.apache.hadoop.io.BooleanWritable)17 FloatWritable (org.apache.hadoop.io.FloatWritable)17