Search in sources :

Example 81 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class VectorColumnAssignFactory method buildObjectAssign.

public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, int outColIndex, PrimitiveCategory category) throws HiveException {
    VectorColumnAssign outVCA = null;
    ColumnVector destCol = outputBatch.cols[outColIndex];
    if (destCol == null) {
        switch(category) {
            case VOID:
                outVCA = new VectorLongColumnAssign() {

                    // This is a dummy assigner
                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        // This is no-op, there is no column to assign to and val is expected to be null
                        assert (val == null);
                    }
                };
                break;
            default:
                throw new HiveException("Incompatible (null) vector column and primitive category " + category);
        }
    } else if (destCol instanceof LongColumnVector) {
        switch(category) {
            case BOOLEAN:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            BooleanWritable bw = (BooleanWritable) val;
                            assignLong(bw.get() ? 1 : 0, destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case BYTE:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            ByteWritable bw = (ByteWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case SHORT:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            ShortWritable bw = (ShortWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INT:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            IntWritable bw = (IntWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case LONG:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            LongWritable bw = (LongWritable) val;
                            assignLong(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case TIMESTAMP:
                outVCA = new VectorTimestampColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            assignTimestamp((TimestampWritable) val, destIndex);
                        }
                    }
                }.init(outputBatch, (TimestampColumnVector) destCol);
                break;
            case DATE:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            DateWritable bw = (DateWritable) val;
                            assignLong(bw.getDays(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INTERVAL_YEAR_MONTH:
                outVCA = new VectorLongColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            HiveIntervalYearMonthWritable bw = (HiveIntervalYearMonthWritable) val;
                            assignLong(bw.getHiveIntervalYearMonth().getTotalMonths(), destIndex);
                        }
                    }
                }.init(outputBatch, (LongColumnVector) destCol);
                break;
            case INTERVAL_DAY_TIME:
                outVCA = new VectorIntervalDayTimeColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            HiveIntervalDayTimeWritable bw = (HiveIntervalDayTimeWritable) val;
                            assignIntervalDayTime(bw.getHiveIntervalDayTime(), destIndex);
                        }
                    }
                }.init(outputBatch, (IntervalDayTimeColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Long vector column and primitive category " + category);
        }
    } else if (destCol instanceof DoubleColumnVector) {
        switch(category) {
            case DOUBLE:
                outVCA = new VectorDoubleColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            DoubleWritable bw = (DoubleWritable) val;
                            assignDouble(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (DoubleColumnVector) destCol);
                break;
            case FLOAT:
                outVCA = new VectorDoubleColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            FloatWritable bw = (FloatWritable) val;
                            assignDouble(bw.get(), destIndex);
                        }
                    }
                }.init(outputBatch, (DoubleColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Double vector column and primitive category " + category);
        }
    } else if (destCol instanceof BytesColumnVector) {
        switch(category) {
            case BINARY:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            BytesWritable bw = (BytesWritable) val;
                            byte[] bytes = bw.getBytes();
                            assignBytes(bytes, 0, bw.getLength(), destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case STRING:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            Text bw = (Text) val;
                            byte[] bytes = bw.getBytes();
                            assignBytes(bytes, 0, bw.getLength(), destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case VARCHAR:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            // We store VARCHAR type stripped of pads.
                            HiveVarchar hiveVarchar;
                            if (val instanceof HiveVarchar) {
                                hiveVarchar = (HiveVarchar) val;
                            } else {
                                hiveVarchar = ((HiveVarcharWritable) val).getHiveVarchar();
                            }
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            assignBytes(bytes, 0, bytes.length, destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            case CHAR:
                outVCA = new VectorBytesColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            // We store CHAR type stripped of pads.
                            HiveChar hiveChar;
                            if (val instanceof HiveChar) {
                                hiveChar = (HiveChar) val;
                            } else {
                                hiveChar = ((HiveCharWritable) val).getHiveChar();
                            }
                            byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            assignBytes(bytes, 0, bytes.length, destIndex);
                        }
                    }
                }.init(outputBatch, (BytesColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Bytes vector column and primitive category " + category);
        }
    } else if (destCol instanceof DecimalColumnVector) {
        switch(category) {
            case DECIMAL:
                outVCA = new VectorDecimalColumnAssign() {

                    @Override
                    public void assignObjectValue(Object val, int destIndex) throws HiveException {
                        if (val == null) {
                            assignNull(destIndex);
                        } else {
                            if (val instanceof HiveDecimal) {
                                assignDecimal((HiveDecimal) val, destIndex);
                            } else {
                                assignDecimal((HiveDecimalWritable) val, destIndex);
                            }
                        }
                    }
                }.init(outputBatch, (DecimalColumnVector) destCol);
                break;
            default:
                throw new HiveException("Incompatible Decimal vector column and primitive category " + category);
        }
    } else {
        throw new HiveException("Unknown vector column type " + destCol.getClass().getName());
    }
    return outVCA;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 82 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class VectorDeserializeRow method convertRowColumn.

/**
   * Convert one row column value that is the current value in deserializeRead.
   *
   * We deserialize into a writable and then pass that writable to an instance of VectorAssignRow
   * to convert the writable to the target data type and assign it into the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @throws IOException
   */
private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) throws IOException {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    Writable convertSourceWritable = convertSourceWritables[logicalColumnIndex];
    switch(sourceCategories[logicalColumnIndex]) {
        case PRIMITIVE:
            {
                switch(sourcePrimitiveCategories[logicalColumnIndex]) {
                    case VOID:
                        convertSourceWritable = null;
                        break;
                    case BOOLEAN:
                        ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean);
                        break;
                    case BYTE:
                        ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte);
                        break;
                    case SHORT:
                        ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort);
                        break;
                    case INT:
                        ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt);
                        break;
                    case LONG:
                        ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong);
                        break;
                    case TIMESTAMP:
                        ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable);
                        break;
                    case DATE:
                        ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable);
                        break;
                    case FLOAT:
                        ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat);
                        break;
                    case DOUBLE:
                        ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble);
                        break;
                    case BINARY:
                        if (deserializeRead.currentBytes == null) {
                            LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        ((BytesWritable) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case STRING:
                        if (deserializeRead.currentBytes == null) {
                            throw new RuntimeException("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                        ((Text) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case VARCHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveVarcharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case CHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveCharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case DECIMAL:
                        ((HiveDecimalWritable) convertSourceWritable).set(deserializeRead.currentHiveDecimalWritable);
                        break;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalYearMonthWritable);
                        break;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalDayTimeWritable);
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + sourcePrimitiveCategories[logicalColumnIndex] + " not supported");
                }
            }
            break;
        default:
            throw new RuntimeException("Category " + sourceCategories[logicalColumnIndex] + " not supported");
    }
    /*
     * Convert our source object we just read into the target object and store that in the
     * VectorizedRowBatch.
     */
    convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, convertSourceWritable);
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Example 83 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project presto by prestodb.

the class OrcFileRewriter method uncompressedSize.

private static int uncompressedSize(Object object) throws IOException {
    if (object instanceof OrcStruct) {
        OrcStruct struct = (OrcStruct) object;
        int size = 0;
        for (int i = 0; i < struct.getNumFields(); i++) {
            size += uncompressedSize(getFieldValue(struct, i));
        }
        return size;
    }
    if ((object == null) || (object instanceof BooleanWritable)) {
        return SIZE_OF_BYTE;
    }
    if (object instanceof LongWritable) {
        return SIZE_OF_LONG;
    }
    if (object instanceof DoubleWritable) {
        return SIZE_OF_DOUBLE;
    }
    if (object instanceof Text) {
        return ((Text) object).getLength();
    }
    if (object instanceof BytesWritable) {
        return ((BytesWritable) object).getLength();
    }
    if (object instanceof List<?>) {
        int size = 0;
        for (Object element : (Iterable<?>) object) {
            size += uncompressedSize(element);
        }
        return size;
    }
    if (object instanceof Map<?, ?>) {
        int size = 0;
        for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) {
            size += uncompressedSize(entry.getKey());
            size += uncompressedSize(entry.getValue());
        }
        return size;
    }
    throw new IOException("Unhandled ORC object: " + object.getClass().getName());
}
Also used : DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) BooleanWritable(org.apache.hadoop.io.BooleanWritable) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) Map(java.util.Map)

Example 84 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class DruidSerDe method deserialize.

@Override
public Object deserialize(Writable writable) throws SerDeException {
    final DruidWritable input = (DruidWritable) writable;
    final List<Object> output = Lists.newArrayListWithExpectedSize(columns.length);
    for (int i = 0; i < columns.length; i++) {
        final Object value = input.getValue().get(columns[i]);
        if (value == null) {
            output.add(null);
            continue;
        }
        switch(types[i].getPrimitiveCategory()) {
            case TIMESTAMP:
                output.add(new TimestampWritable(Timestamp.valueOf(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), tsTZTypeInfo.timeZone()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")).toString())));
                break;
            case TIMESTAMPLOCALTZ:
                output.add(new TimestampLocalTZWritable(new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), ((TimestampLocalTZTypeInfo) types[i]).timeZone()))));
                break;
            case BYTE:
                output.add(new ByteWritable(((Number) value).byteValue()));
                break;
            case SHORT:
                output.add(new ShortWritable(((Number) value).shortValue()));
                break;
            case INT:
                output.add(new IntWritable(((Number) value).intValue()));
                break;
            case LONG:
                output.add(new LongWritable(((Number) value).longValue()));
                break;
            case FLOAT:
                output.add(new FloatWritable(((Number) value).floatValue()));
                break;
            case DOUBLE:
                output.add(new DoubleWritable(((Number) value).doubleValue()));
                break;
            case DECIMAL:
                output.add(new HiveDecimalWritable(HiveDecimal.create(((Number) value).doubleValue())));
                break;
            case CHAR:
                output.add(new HiveCharWritable(new HiveChar(value.toString(), ((CharTypeInfo) types[i]).getLength())));
                break;
            case VARCHAR:
                output.add(new HiveVarcharWritable(new HiveVarchar(value.toString(), ((VarcharTypeInfo) types[i]).getLength())));
                break;
            case STRING:
                output.add(new Text(value.toString()));
                break;
            case BOOLEAN:
                output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
                break;
            default:
                throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());
        }
    }
    return output;
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) TimestampTZ(org.apache.hadoop.hive.common.type.TimestampTZ) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 85 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project nifi by apache.

the class TestConvertAvroToORC method test_onTrigger_complex_record.

@Test
public void test_onTrigger_complex_record() throws Exception {
    Map<String, Double> mapData1 = new TreeMap<String, Double>() {

        {
            put("key1", 1.0);
            put("key2", 2.0);
        }
    };
    GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20));
    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    // Put another record in
    Map<String, Double> mapData2 = new TreeMap<String, Double>() {

        {
            put("key1", 3.0);
            put("key2", 4.0);
        }
    };
    record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200));
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();
    Map<String, String> attributes = new HashMap<String, String>() {

        {
            put(CoreAttributes.FILENAME.key(), "test");
        }
    };
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();
    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());
    Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
    assertTrue(mapFieldObject instanceof Map);
    Map map = (Map) mapFieldObject;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
    mapValue = map.get(new Text("key2"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) DoubleWritable(org.apache.hadoop.io.DoubleWritable) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) FileSystem(org.apache.hadoop.fs.FileSystem) GenericRecord(org.apache.avro.generic.GenericRecord) IntWritable(org.apache.hadoop.io.IntWritable) Path(org.apache.hadoop.fs.Path) DataFileWriter(org.apache.avro.file.DataFileWriter) Text(org.apache.hadoop.io.Text) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeMap(java.util.TreeMap) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MockFlowFile(org.apache.nifi.util.MockFlowFile) FileOutputStream(java.io.FileOutputStream) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)132 Test (org.junit.Test)85 IntWritable (org.apache.hadoop.io.IntWritable)71 LongWritable (org.apache.hadoop.io.LongWritable)70 Text (org.apache.hadoop.io.Text)57 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)52 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 FloatWritable (org.apache.hadoop.io.FloatWritable)51 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)49 BooleanWritable (org.apache.hadoop.io.BooleanWritable)46 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)42 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)42 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)40 BytesWritable (org.apache.hadoop.io.BytesWritable)36 ArrayList (java.util.ArrayList)29 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)27 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)21 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)21 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)19