Search in sources :

Example 26 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.

the class ColumnarBinaryHiveRecordCursor method parseLongColumn.

private void parseLongColumn(int column) {
    loaded[column] = true;
    if (hiveColumnIndexes[column] >= value.size()) {
        // this partition may contain fewer fields than what's declared in the schema
        // this happens when additional columns are added to the hive table after a partition has been created
        nulls[column] = true;
    } else {
        BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
        byte[] bytes;
        try {
            bytes = fieldData.getData();
        } catch (IOException e) {
            throw new PrestoException(HIVE_BAD_DATA, e);
        }
        int start = fieldData.getStart();
        int length = fieldData.getLength();
        parseLongColumn(column, bytes, start, length);
    }
}
Also used : PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 27 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.

the class ColumnarTextHiveRecordCursor method parseObjectColumn.

private void parseObjectColumn(int column) {
    loaded[column] = true;
    if (hiveColumnIndexes[column] >= value.size()) {
        // this partition may contain fewer fields than what's declared in the schema
        // this happens when additional columns are added to the hive table after a partition has been created
        nulls[column] = true;
    } else {
        BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
        byte[] bytes;
        try {
            bytes = fieldData.getData();
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
        int start = fieldData.getStart();
        int length = fieldData.getLength();
        parseObjectColumn(column, bytes, start, length);
    }
}
Also used : IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 28 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.

the class ColumnarTextHiveRecordCursor method parseDoubleColumn.

private void parseDoubleColumn(int column) {
    loaded[column] = true;
    if (hiveColumnIndexes[column] >= value.size()) {
        // this partition may contain fewer fields than what's declared in the schema
        // this happens when additional columns are added to the hive table after a partition has been created
        nulls[column] = true;
    } else {
        BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
        byte[] bytes;
        try {
            bytes = fieldData.getData();
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
        int start = fieldData.getStart();
        int length = fieldData.getLength();
        parseDoubleColumn(column, bytes, start, length);
    }
}
Also used : IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 29 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project DataX by alibaba.

the class DFSUtil method rcFileStartRead.

public void rcFileStartRead(String sourceRcFilePath, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    LOG.info(String.format("Start Read rcfile [%s].", sourceRcFilePath));
    List<ColumnEntry> column = UnstructuredStorageReaderUtil.getListColumnEntry(readerSliceConfig, com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN);
    // warn: no default value '\N'
    String nullFormat = readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.NULL_FORMAT);
    Path rcFilePath = new Path(sourceRcFilePath);
    FileSystem fs = null;
    RCFileRecordReader recordReader = null;
    try {
        fs = FileSystem.get(rcFilePath.toUri(), hadoopConf);
        long fileLen = fs.getFileStatus(rcFilePath).getLen();
        FileSplit split = new FileSplit(rcFilePath, 0, fileLen, (String[]) null);
        recordReader = new RCFileRecordReader(hadoopConf, split);
        LongWritable key = new LongWritable();
        BytesRefArrayWritable value = new BytesRefArrayWritable();
        Text txt = new Text();
        while (recordReader.next(key, value)) {
            String[] sourceLine = new String[value.size()];
            txt.clear();
            for (int i = 0; i < value.size(); i++) {
                BytesRefWritable v = value.get(i);
                txt.set(v.getData(), v.getStart(), v.getLength());
                sourceLine[i] = txt.toString();
            }
            UnstructuredStorageReaderUtil.transportOneRecord(recordSender, column, sourceLine, nullFormat, taskPluginCollector);
        }
    } catch (IOException e) {
        String message = String.format("读取文件[%s]时出错", sourceRcFilePath);
        LOG.error(message);
        throw DataXException.asDataXException(HdfsReaderErrorCode.READ_RCFILE_ERROR, message, e);
    } finally {
        try {
            if (recordReader != null) {
                recordReader.close();
                LOG.info("Finally, Close RCFileRecordReader.");
            }
        } catch (IOException e) {
            LOG.warn(String.format("finally: 关闭RCFileRecordReader失败, %s", e.getMessage()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RCFileRecordReader(org.apache.hadoop.hive.ql.io.RCFileRecordReader) ColumnEntry(com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Aggregations

BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)28 IOException (java.io.IOException)14 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)14 Configuration (org.apache.hadoop.conf.Configuration)6 Path (org.apache.hadoop.fs.Path)5 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 LongWritable (org.apache.hadoop.io.LongWritable)4 RecordReader (org.apache.hadoop.mapred.RecordReader)4 Test (org.junit.Test)4 Random (java.util.Random)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 Text (org.apache.hadoop.io.Text)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 JobConf (org.apache.hadoop.mapred.JobConf)2 ColumnEntry (com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry)1 PrestoException (com.facebook.presto.spi.PrestoException)1 FileWriter (java.io.FileWriter)1 PrintWriter (java.io.PrintWriter)1