Search in sources :

Example 31 with HiveVarcharWritable

use of org.apache.hadoop.hive.serde2.io.HiveVarcharWritable in project hive by apache.

the class VectorDeserializeRow method convertRowColumn.

/**
   * Convert one row column value that is the current value in deserializeRead.
   *
   * We deserialize into a writable and then pass that writable to an instance of VectorAssignRow
   * to convert the writable to the target data type and assign it into the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @throws IOException
   */
private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) throws IOException {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    Writable convertSourceWritable = convertSourceWritables[logicalColumnIndex];
    switch(sourceCategories[logicalColumnIndex]) {
        case PRIMITIVE:
            {
                switch(sourcePrimitiveCategories[logicalColumnIndex]) {
                    case VOID:
                        convertSourceWritable = null;
                        break;
                    case BOOLEAN:
                        ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean);
                        break;
                    case BYTE:
                        ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte);
                        break;
                    case SHORT:
                        ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort);
                        break;
                    case INT:
                        ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt);
                        break;
                    case LONG:
                        ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong);
                        break;
                    case TIMESTAMP:
                        ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable);
                        break;
                    case DATE:
                        ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable);
                        break;
                    case FLOAT:
                        ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat);
                        break;
                    case DOUBLE:
                        ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble);
                        break;
                    case BINARY:
                        if (deserializeRead.currentBytes == null) {
                            LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        ((BytesWritable) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case STRING:
                        if (deserializeRead.currentBytes == null) {
                            throw new RuntimeException("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                        ((Text) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case VARCHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveVarcharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case CHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveCharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case DECIMAL:
                        ((HiveDecimalWritable) convertSourceWritable).set(deserializeRead.currentHiveDecimalWritable);
                        break;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalYearMonthWritable);
                        break;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalDayTimeWritable);
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + sourcePrimitiveCategories[logicalColumnIndex] + " not supported");
                }
            }
            break;
        default:
            throw new RuntimeException("Category " + sourceCategories[logicalColumnIndex] + " not supported");
    }
    /*
     * Convert our source object we just read into the target object and store that in the
     * VectorizedRowBatch.
     */
    convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, convertSourceWritable);
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Example 32 with HiveVarcharWritable

use of org.apache.hadoop.hive.serde2.io.HiveVarcharWritable in project hive by apache.

the class VectorExtractRow method extractRowColumn.

/**
   * Extract a row's column object from the ColumnVector at batchIndex in the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @return
   */
public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    ColumnVector colVector = batch.cols[projectionColumnNum];
    if (colVector == null) {
        // may ask for them..
        return null;
    }
    int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
    if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
        return null;
    }
    Category category = categories[logicalColumnIndex];
    switch(category) {
        case PRIMITIVE:
            {
                Writable primitiveWritable = primitiveWritables[logicalColumnIndex];
                PrimitiveCategory primitiveCategory = primitiveCategories[logicalColumnIndex];
                switch(primitiveCategory) {
                    case VOID:
                        return null;
                    case BOOLEAN:
                        ((BooleanWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex] == 0 ? false : true);
                        return primitiveWritable;
                    case BYTE:
                        ((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case SHORT:
                        ((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INT:
                        ((IntWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case LONG:
                        ((LongWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case TIMESTAMP:
                        ((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) batch.cols[projectionColumnNum]).asScratchTimestamp(adjustedIndex));
                        return primitiveWritable;
                    case DATE:
                        ((DateWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case FLOAT:
                        ((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case DOUBLE:
                        ((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case BINARY:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
                            bytesWritable.set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case STRING:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                            ((Text) primitiveWritable).set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case VARCHAR:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(bytes, start, length, maxLengths[logicalColumnIndex]);
                            HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
                            hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
                            return primitiveWritable;
                        }
                    case CHAR:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, maxLengths[logicalColumnIndex]);
                            HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
                            hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLengths[logicalColumnIndex]);
                            return primitiveWritable;
                        }
                    case DECIMAL:
                        // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
                        ((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).asScratchIntervalDayTime(adjustedIndex));
                        return primitiveWritable;
                    default:
                        throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
                }
            }
        default:
            throw new RuntimeException("Category " + category.name() + " not supported");
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 33 with HiveVarcharWritable

use of org.apache.hadoop.hive.serde2.io.HiveVarcharWritable in project hive by apache.

the class TestGenericUDFOPMinus method testVarcharMinusInt.

@Test
public void testVarcharMinusInt() throws HiveException {
    GenericUDFOPMinus udf = new GenericUDFOPMinus();
    HiveVarcharWritable left = new HiveVarcharWritable();
    left.set("123");
    IntWritable right = new IntWritable(456);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector, PrimitiveObjectInspectorFactory.writableIntObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.doubleTypeInfo);
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(-333.0), new Double(res.get()));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 34 with HiveVarcharWritable

use of org.apache.hadoop.hive.serde2.io.HiveVarcharWritable in project hive by apache.

the class TestGenericUDFPrintf method testVarcharFormat.

@Test
public void testVarcharFormat() throws HiveException {
    GenericUDFPrintf udf = new GenericUDFPrintf();
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getVarcharTypeInfo(7)), PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getCharTypeInfo(5)) };
    HiveCharWritable argChar = new HiveCharWritable();
    argChar.set("hello");
    HiveVarcharWritable formatVarchar = new HiveVarcharWritable();
    formatVarchar.set("arg1=%s");
    DeferredObject[] args = { new DeferredJavaObject(formatVarchar), new DeferredJavaObject(argChar) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(PrimitiveObjectInspectorFactory.writableStringObjectInspector, oi);
    Text res = (Text) udf.evaluate(args);
    Assert.assertEquals("arg1=hello", res.toString());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 35 with HiveVarcharWritable

use of org.apache.hadoop.hive.serde2.io.HiveVarcharWritable in project hive by apache.

the class TestGenericUDFOPMultiply method testVarcharTimesInt.

@Test
public void testVarcharTimesInt() throws HiveException {
    GenericUDFOPMultiply udf = new GenericUDFOPMultiply();
    HiveVarcharWritable left = new HiveVarcharWritable();
    left.set("123");
    IntWritable right = new IntWritable(456);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector, PrimitiveObjectInspectorFactory.writableIntObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.doubleTypeInfo);
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(123 * 456), new Double(res.get()));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)46 Text (org.apache.hadoop.io.Text)31 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)28 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)25 IntWritable (org.apache.hadoop.io.IntWritable)24 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)22 LongWritable (org.apache.hadoop.io.LongWritable)21 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)20 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 BooleanWritable (org.apache.hadoop.io.BooleanWritable)20 BytesWritable (org.apache.hadoop.io.BytesWritable)20 FloatWritable (org.apache.hadoop.io.FloatWritable)19 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)18 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)18 Test (org.junit.Test)18 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)17 ArrayList (java.util.ArrayList)15 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)15 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)15