Examples with ByteWritable - org.apache.hadoop.hive.serde2.io.ByteWritable

Example 56 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TypedBytesRecordReader method write.

private void write(int pos, Writable inpw) throws IOException {
    String typ = columnTypes.get(pos);
    Writable w = (Writable) converters.get(pos).convert(inpw);
    if (typ.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
        tbOut.writeBoolean((BooleanWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) {
        tbOut.writeByte((ByteWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) {
        tbOut.writeShort((ShortWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
        tbOut.writeInt((IntWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
        tbOut.writeLong((LongWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
        tbOut.writeFloat((FloatWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
        tbOut.writeDouble((DoubleWritable) w);
    } else if (typ.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
        tbOut.writeText((Text) w);
    } else {
        assert false;
    }
}

Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) Text(org.apache.hadoop.io.Text) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 57 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class VectorColumnAssignFactory method buildObjectAssign.

public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, int outColIndex, PrimitiveCategory category) throws HiveException {
    VectorColumnAssign outVCA = null;
    ColumnVector destCol = outputBatch.cols[outColIndex];
    if (destCol == null) {
        switch(category) {
            case VOID:
                outVCA = new VectorLongColumnAssign() {

                    // This is a dummy assigner
                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        // This is no-op, there is no column to assign to and val is expected to be null
                        assert (val == null);
                    }
                };
                break;
            default:
                throw new HiveException("Incompatible (null) vector column and primitive category " + category);
        }
    } else if (destCol instanceof LongColumnVector) {
        switch(category) {
            case BOOLEAN:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            BooleanWritable bw = (BooleanWritable) val;
                            assignLong(bw.get() ? 1 : 0, destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case BYTE:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            ByteWritable bw = (ByteWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case SHORT:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            ShortWritable bw = (ShortWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INT:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            IntWritable bw = (IntWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case LONG:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            LongWritable bw = (LongWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case TIMESTAMP:
                outVCA = new VectorTimestampColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            assignTimestamp((TimestampWritable) val, destIndex);
                        }
                    }
                }.init(outputBatch, (TimestampColumnVector) destCol);
                break;
            case DATE:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            DateWritable bw = (DateWritable) val;
                            assignLong(bw.getDays(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INTERVAL_YEAR_MONTH:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            HiveIntervalYearMonthWritable bw = (HiveIntervalYearMonthWritable) val;
                            assignLong(bw.getHiveIntervalYearMonth().getTotalMonths(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INTERVAL_DAY_TIME:
                outVCA = new VectorIntervalDayTimeColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            HiveIntervalDayTimeWritable bw = (HiveIntervalDayTimeWritable) val;
                            assignIntervalDayTime(bw.getHiveIntervalDayTime(), destIndex);
                        }
                    }
                }.init(outputBatch, (IntervalDayTimeColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Long vector column and primitive category " + category);
        }
    } else if (destCol instanceof DoubleColumnVector) {
        switch(category) {
            case DOUBLE:
                outVCA = new VectorDoubleColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            DoubleWritable bw = (DoubleWritable) val;
                            assignDouble(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (DoubleColumnVector) destCol);
                break;
            case FLOAT:
                outVCA = new VectorDoubleColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            FloatWritable bw = (FloatWritable) val;
                            assignDouble(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (DoubleColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Double vector column and primitive category " + category);
        }
    } else if (destCol instanceof BytesColumnVector) {
        switch(category) {
            case BINARY:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            BytesWritable bw = (BytesWritable) val;
                            byte[] bytes = bw.getBytes();
                            assignBytes(bytes, 0, bw.getLength(), destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case STRING:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            Text bw = (Text) val;
                            byte[] bytes = bw.getBytes();
                            assignBytes(bytes, 0, bw.getLength(), destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case VARCHAR:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            // We store VARCHAR type stripped of pads.
                            HiveVarchar hiveVarchar;
                            if (val instanceof HiveVarchar) {
                                hiveVarchar = (HiveVarchar) val;
                            } else {
                                hiveVarchar = ((HiveVarcharWritable) val).getHiveVarchar();
                            }
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            assignBytes(bytes, 0, bytes.length, destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case CHAR:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            // We store CHAR type stripped of pads.
                            HiveChar hiveChar;
                            if (val instanceof HiveChar) {
                                hiveChar = (HiveChar) val;
                            } else {
                                hiveChar = ((HiveCharWritable) val).getHiveChar();
                            }
                            byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            assignBytes(bytes, 0, bytes.length, destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Bytes vector column and primitive category " + category);
        }
    } else if (destCol instanceof DecimalColumnVector) {
        switch(category) {
            case DECIMAL:
                outVCA = new VectorDecimalColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            if (val instanceof HiveDecimal) {
                                assignDecimal((HiveDecimal) val, destIndex);
                            } else {
                                assignDecimal((HiveDecimalWritable) val, destIndex);
                            }
                        }
                    }
                }.init(outputBatch, (DecimalColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Decimal vector column and primitive category " + category);
        }
    } else {
        throw new HiveException("Unknown vector column type " + destCol.getClass().getName());
    }
    return outVCA;
}

Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 58 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class VectorDeserializeRow method convertRowColumn.

/**
   * Convert one row column value that is the current value in deserializeRead.
   *
   * We deserialize into a writable and then pass that writable to an instance of VectorAssignRow
   * to convert the writable to the target data type and assign it into the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @throws IOException
   */
private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) throws IOException {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    Writable convertSourceWritable = convertSourceWritables[logicalColumnIndex];
    switch(sourceCategories[logicalColumnIndex]) {
        case PRIMITIVE:
            {
                switch(sourcePrimitiveCategories[logicalColumnIndex]) {
                    case VOID:
                        convertSourceWritable = null;
                        break;
                    case BOOLEAN:
                        ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean);
                        break;
                    case BYTE:
                        ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte);
                        break;
                    case SHORT:
                        ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort);
                        break;
                    case INT:
                        ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt);
                        break;
                    case LONG:
                        ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong);
                        break;
                    case TIMESTAMP:
                        ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable);
                        break;
                    case DATE:
                        ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable);
                        break;
                    case FLOAT:
                        ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat);
                        break;
                    case DOUBLE:
                        ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble);
                        break;
                    case BINARY:
                        if (deserializeRead.currentBytes == null) {
                            LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        ((BytesWritable) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case STRING:
                        if (deserializeRead.currentBytes == null) {
                            throw new RuntimeException("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                        ((Text) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case VARCHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveVarcharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case CHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveCharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case DECIMAL:
                        ((HiveDecimalWritable) convertSourceWritable).set(deserializeRead.currentHiveDecimalWritable);
                        break;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalYearMonthWritable);
                        break;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalDayTimeWritable);
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + sourcePrimitiveCategories[logicalColumnIndex] + " not supported");
                }
            }
            break;
        default:
            throw new RuntimeException("Category " + sourceCategories[logicalColumnIndex] + " not supported");
    }
    /*
     * Convert our source object we just read into the target object and store that in the
     * VectorizedRowBatch.
     */
    convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, convertSourceWritable);
}

Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Example 59 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class VectorExtractRow method extractRowColumn.

/**
   * Extract a row's column object from the ColumnVector at batchIndex in the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @return
   */
public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    ColumnVector colVector = batch.cols[projectionColumnNum];
    if (colVector == null) {
        // may ask for them..
        return null;
    }
    int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
    if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
        return null;
    }
    Category category = categories[logicalColumnIndex];
    switch(category) {
        case PRIMITIVE:
            {
                Writable primitiveWritable = primitiveWritables[logicalColumnIndex];
                PrimitiveCategory primitiveCategory = primitiveCategories[logicalColumnIndex];
                switch(primitiveCategory) {
                    case VOID:
                        return null;
                    case BOOLEAN:
                        ((BooleanWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex] == 0 ? false : true);
                        return primitiveWritable;
                    case BYTE:
                        ((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case SHORT:
                        ((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INT:
                        ((IntWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case LONG:
                        ((LongWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case TIMESTAMP:
                        ((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) batch.cols[projectionColumnNum]).asScratchTimestamp(adjustedIndex));
                        return primitiveWritable;
                    case DATE:
                        ((DateWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case FLOAT:
                        ((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case DOUBLE:
                        ((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case BINARY:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
                            bytesWritable.set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case STRING:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                            ((Text) primitiveWritable).set(bytes, start, length);
                            return primitiveWritable;
                        }
                    case VARCHAR:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(bytes, start, length, maxLengths[logicalColumnIndex]);
                            HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
                            hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
                            return primitiveWritable;
                        }
                    case CHAR:
                        {
                            BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
                            byte[] bytes = bytesColVector.vector[adjustedIndex];
                            int start = bytesColVector.start[adjustedIndex];
                            int length = bytesColVector.length[adjustedIndex];
                            if (bytes == null) {
                                nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, maxLengths[logicalColumnIndex]);
                            HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
                            hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLengths[logicalColumnIndex]);
                            return primitiveWritable;
                        }
                    case DECIMAL:
                        // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
                        ((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
                        return primitiveWritable;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).asScratchIntervalDayTime(adjustedIndex));
                        return primitiveWritable;
                    default:
                        throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
                }
            }
        default:
            throw new RuntimeException("Category " + category.name() + " not supported");
    }
}

Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 60 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestOrcFile method testSeek.

@Test
public void testSeek() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(200000).bufferSize(65536).rowIndexStride(1000));
    Random rand = new Random(42);
    final int COUNT = 32768;
    long[] intValues = new long[COUNT];
    double[] doubleValues = new double[COUNT];
    String[] stringValues = new String[COUNT];
    BytesWritable[] byteValues = new BytesWritable[COUNT];
    String[] words = new String[128];
    for (int i = 0; i < words.length; ++i) {
        words[i] = Integer.toHexString(rand.nextInt());
    }
    for (int i = 0; i < COUNT / 2; ++i) {
        intValues[2 * i] = rand.nextLong();
        intValues[2 * i + 1] = intValues[2 * i];
        stringValues[2 * i] = words[rand.nextInt(words.length)];
        stringValues[2 * i + 1] = stringValues[2 * i];
    }
    for (int i = 0; i < COUNT; ++i) {
        doubleValues[i] = rand.nextDouble();
        byte[] buf = new byte[20];
        rand.nextBytes(buf);
        byteValues[i] = new BytesWritable(buf);
    }
    for (int i = 0; i < COUNT; ++i) {
        writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i));
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows();
    OrcStruct row = null;
    for (int i = COUNT - 1; i >= 0; --i) {
        // we load the previous buffer of rows
        if (i % COUNT == COUNT - 1) {
            rows.seekToRow(i - (COUNT - 1));
        }
        rows.seekToRow(i);
        row = (OrcStruct) rows.next(row);
        BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
        assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) row.getFieldValue(0)).get());
        assertEquals(expected.byte1.byteValue(), ((ByteWritable) row.getFieldValue(1)).get());
        assertEquals(expected.short1.shortValue(), ((ShortWritable) row.getFieldValue(2)).get());
        assertEquals(expected.int1.intValue(), ((IntWritable) row.getFieldValue(3)).get());
        assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
        assertEquals(expected.float1, ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
        assertEquals(expected.double1, ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
        assertEquals(expected.bytes1, row.getFieldValue(7));
        assertEquals(expected.string1, row.getFieldValue(8));
        List<InnerStruct> expectedList = expected.middle.list;
        List<OrcStruct> actualList = (List<OrcStruct>) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
        compareList(expectedList, actualList);
        compareList(expected.list, (List<OrcStruct>) row.getFieldValue(10));
    }
    rows.close();
    Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator();
    long offsetOfStripe2 = 0;
    long offsetOfStripe4 = 0;
    long lastRowOfStripe2 = 0;
    for (int i = 0; i < 5; ++i) {
        StripeInformation stripe = stripeIterator.next();
        if (i < 2) {
            lastRowOfStripe2 += stripe.getNumberOfRows();
        } else if (i == 2) {
            offsetOfStripe2 = stripe.getOffset();
            lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
        } else if (i == 4) {
            offsetOfStripe4 = stripe.getOffset();
        }
    }
    boolean[] columns = new boolean[reader.getStatistics().length];
    // long colulmn
    columns[5] = true;
    // text column
    columns[9] = true;
    rows = reader.rowsOptions(new Reader.Options().range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2).include(columns));
    rows.seekToRow(lastRowOfStripe2);
    for (int i = 0; i < 2; ++i) {
        row = (OrcStruct) rows.next(row);
        BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int) (lastRowOfStripe2 + i));
        assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
        assertEquals(expected.string1, row.getFieldValue(8));
    }
    rows.close();
}

Also used : Random(java.util.Random) List(java.util.List) ArrayList(java.util.ArrayList) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Aggregations

ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)75 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)49 IntWritable (org.apache.hadoop.io.IntWritable)48 LongWritable (org.apache.hadoop.io.LongWritable)44 Text (org.apache.hadoop.io.Text)43 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)42 FloatWritable (org.apache.hadoop.io.FloatWritable)35 BooleanWritable (org.apache.hadoop.io.BooleanWritable)33 Test (org.junit.Test)27 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)26 BytesWritable (org.apache.hadoop.io.BytesWritable)25 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)22 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)21 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)21 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)21 ArrayList (java.util.ArrayList)19 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)18 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)16 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)14