Search in sources :

Example 16 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.

the class ColumnarTextHiveRecordCursor method parseDecimalColumn.

private void parseDecimalColumn(int column) {
    loaded[column] = true;
    if (hiveColumnIndexes[column] >= value.size()) {
        // this partition may contain fewer fields than what's declared in the schema
        // this happens when additional columns are added to the hive table after a partition has been created
        nulls[column] = true;
    } else {
        BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
        byte[] bytes;
        try {
            bytes = fieldData.getData();
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
        int start = fieldData.getStart();
        int length = fieldData.getLength();
        parseDecimalColumn(column, bytes, start, length);
    }
}
Also used : IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 17 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.

the class ColumnarTextHiveRecordCursor method parseBooleanColumn.

private void parseBooleanColumn(int column) {
    loaded[column] = true;
    if (hiveColumnIndexes[column] >= value.size()) {
        // this partition may contain fewer fields than what's declared in the schema
        // this happens when additional columns are added to the hive table after a partition has been created
        nulls[column] = true;
    } else {
        BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
        byte[] bytes;
        try {
            bytes = fieldData.getData();
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
        int start = fieldData.getStart();
        int length = fieldData.getLength();
        parseBooleanColumn(column, bytes, start, length);
    }
}
Also used : IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 18 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class ColumnarSerDeBase method initialize.

protected void initialize(int size) throws SerDeException {
    field = new BytesRefWritable[size];
    for (int i = 0; i < size; i++) {
        field[i] = new BytesRefWritable();
        serializeCache.set(i, field[i]);
    }
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats)

Example 19 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestRCFile method testSync.

@Test
public void testSync() throws IOException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsync");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    int intervalRecordCount = 500;
    CompressionCodec codec = null;
    int writeCount = 2500;
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(testFile).getLen();
    RCFileInputFormat inputFormat = new RCFileInputFormat();
    JobConf jobconf = new JobConf(cloneConf);
    jobconf.set("mapred.input.dir", testDir.toString());
    HiveConf.setLongVar(jobconf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, fileLen);
    InputSplit[] splits = inputFormat.getSplits(jobconf, 1);
    RCFileRecordReader rr = new RCFileRecordReader(jobconf, (FileSplit) splits[0]);
    long lastSync = 0;
    for (int i = 0; i < 2500; i++) {
        rr.sync(i);
        if (rr.getPos() < lastSync) {
            String reason = String.format("Sync at offset %d skipped sync block at location %d (returned %d instead)", i - 1, rr.getPos(), lastSync);
            System.out.println(reason);
            fail(reason);
        }
        lastSync = rr.getPos();
    }
    rr.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Test(org.junit.Test)

Example 20 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestRCFile method writeThenReadByRecordReader.

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long minSplitSize, CompressionCodec codec) throws IOException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        if (i == intervalRecordCount) {
            System.out.println("write position:" + writer.getLength());
        }
        writer.append(bytes);
    }
    writer.close();
    RCFileInputFormat inputFormat = new RCFileInputFormat();
    JobConf jonconf = new JobConf(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    HiveConf.setLongVar(jonconf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, minSplitSize);
    InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
    assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.length; i++) {
        int previousReadCount = readCount;
        RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
        Object key = rr.createKey();
        Object value = rr.createValue();
        while (rr.next(key, value)) {
            readCount++;
        }
        rr.close();
        System.out.println("The " + i + "th split read " + (readCount - previousReadCount));
    }
    assertEquals("readCount should be equal to writeCount", writeCount, readCount);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Aggregations

BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)28 IOException (java.io.IOException)14 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)14 Configuration (org.apache.hadoop.conf.Configuration)6 Path (org.apache.hadoop.fs.Path)5 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 LongWritable (org.apache.hadoop.io.LongWritable)4 RecordReader (org.apache.hadoop.mapred.RecordReader)4 Test (org.junit.Test)4 Random (java.util.Random)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 Text (org.apache.hadoop.io.Text)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 JobConf (org.apache.hadoop.mapred.JobConf)2 ColumnEntry (com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry)1 PrestoException (com.facebook.presto.spi.PrestoException)1 FileWriter (java.io.FileWriter)1 PrintWriter (java.io.PrintWriter)1