Search in sources :

Example 11 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class VectorizedBatchUtil method setVector.

private static void setVector(Object row, StructObjectInspector oi, StructField field, VectorizedRowBatch batch, DataOutputBuffer buffer, int rowIndex, int colIndex, int offset) throws HiveException {
    Object fieldData = oi.getStructFieldData(row, field);
    ObjectInspector foi = field.getFieldObjectInspector();
    // Vectorization only supports PRIMITIVE data types. Assert the same
    assert (foi.getCategory() == Category.PRIMITIVE);
    // Get writable object
    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
    Object writableCol = poi.getPrimitiveWritableObject(fieldData);
    // float/double. String types have no default value for null.
    switch(poi.getPrimitiveCategory()) {
        case BOOLEAN:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case BYTE:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case SHORT:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INT:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case LONG:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case DATE:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((DateWritableV2) writableCol).getDays();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case FLOAT:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
                    dcv.isNull[rowIndex] = false;
                } else {
                    dcv.vector[rowIndex] = Double.NaN;
                    setNullColIsNullValue(dcv, rowIndex);
                }
            }
            break;
        case DOUBLE:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
                    dcv.isNull[rowIndex] = false;
                } else {
                    dcv.vector[rowIndex] = Double.NaN;
                    setNullColIsNullValue(dcv, rowIndex);
                }
            }
            break;
        case TIMESTAMP:
            {
                TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.set(rowIndex, ((TimestampWritableV2) writableCol).getTimestamp().toSqlTimestamp());
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.setNullValue(rowIndex);
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
                    lcv.vector[rowIndex] = i.getTotalMonths();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INTERVAL_DAY_TIME:
            {
                IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
                    icv.set(rowIndex, idt);
                    icv.isNull[rowIndex] = false;
                } else {
                    icv.setNullValue(rowIndex);
                    setNullColIsNullValue(icv, rowIndex);
                }
            }
            break;
        case BINARY:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    BytesWritable bw = (BytesWritable) writableCol;
                    byte[] bytes = bw.getBytes();
                    int start = buffer.getLength();
                    int length = bw.getLength();
                    try {
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case STRING:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    Text colText = (Text) writableCol;
                    int start = buffer.getLength();
                    int length = colText.getLength();
                    try {
                        buffer.write(colText.getBytes(), 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case CHAR:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
                    byte[] bytes = colHiveChar.getStrippedValue().getBytes();
                    // We assume the CHAR maximum length was enforced when the object was created.
                    int length = bytes.length;
                    int start = buffer.getLength();
                    try {
                        // In vector mode, we store CHAR as unpadded.
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case VARCHAR:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
                    byte[] bytes = colHiveVarchar.getValue().getBytes();
                    // We assume the VARCHAR maximum length was enforced when the object was created.
                    int length = bytes.length;
                    int start = buffer.getLength();
                    try {
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case DECIMAL:
            DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                dcv.isNull[rowIndex] = false;
                HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
                dcv.set(rowIndex, wobj);
            } else {
                setNullColIsNullValue(dcv, rowIndex);
            }
            break;
        default:
            throw new HiveException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 12 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
    byte[][] record_2 = { "100".getBytes(StandardCharsets.UTF_8), "200".getBytes(StandardCharsets.UTF_8), "123".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 13 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class RecordReaderImpl method nextByte.

static ByteWritable nextByte(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        ByteWritable result;
        if (previous == null || previous.getClass() != ByteWritable.class) {
            result = new ByteWritable();
        } else {
            result = (ByteWritable) previous;
        }
        result.set((byte) ((LongColumnVector) vector).vector[row]);
        return result;
    } else {
        return null;
    }
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable)

Example 14 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class BatchToRowReader method nextByte.

public static ByteWritable nextByte(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        ByteWritable result;
        if (previous == null || previous.getClass() != ByteWritable.class) {
            result = new ByteWritable();
        } else {
            result = (ByteWritable) previous;
        }
        result.set((byte) ((LongColumnVector) vector).vector[row]);
        return result;
    } else {
        return null;
    }
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable)

Example 15 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class GenericUDFDateAdd method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0].get() == null) {
        return null;
    }
    Object daysWritableObject = daysConverter.convert(arguments[1].get());
    if (daysWritableObject == null) {
        return null;
    }
    int toBeAdded;
    if (daysWritableObject instanceof ByteWritable) {
        toBeAdded = ((ByteWritable) daysWritableObject).get();
    } else if (daysWritableObject instanceof ShortWritable) {
        toBeAdded = ((ShortWritable) daysWritableObject).get();
    } else if (daysWritableObject instanceof IntWritable) {
        toBeAdded = ((IntWritable) daysWritableObject).get();
    } else {
        return null;
    }
    // Convert the first param into a DateWritableV2 value
    switch(inputType1) {
        case STRING:
            String dateString = dateConverter.convert(arguments[0].get()).toString();
            if (DateParser.parseDate(dateString, dateVal)) {
                output.set(dateVal);
            } else {
                return null;
            }
            break;
        case TIMESTAMP:
            Timestamp ts = ((TimestampWritableV2) dateConverter.convert(arguments[0].get())).getTimestamp();
            output.set(DateWritableV2.millisToDays(ts.toEpochMilli()));
            break;
        case DATE:
            DateWritableV2 dw = (DateWritableV2) dateConverter.convert(arguments[0].get());
            output.set(dw.getDays());
            break;
        default:
            throw new UDFArgumentException("DATE_ADD() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1);
    }
    int newDays = output.getDays() + (signModifier * toBeAdded);
    output.set(newDays);
    return output;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)81 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)56 IntWritable (org.apache.hadoop.io.IntWritable)56 LongWritable (org.apache.hadoop.io.LongWritable)52 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 Text (org.apache.hadoop.io.Text)47 Test (org.junit.Test)44 FloatWritable (org.apache.hadoop.io.FloatWritable)40 BooleanWritable (org.apache.hadoop.io.BooleanWritable)38 BytesWritable (org.apache.hadoop.io.BytesWritable)30 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)29 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)27 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)22 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)21 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)21 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)18 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)17 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)17