Search in sources :

Example 1 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project presto by prestodb.

the class RcFileTester method assertFileContentsOld.

private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Properties schema = new Properties();
    schema.setProperty(META_TABLE_COLUMNS, "test");
    schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
    Deserializer deserializer;
    if (format == Format.BINARY) {
        deserializer = new LazyBinaryColumnarSerDe();
    } else {
        deserializer = new ColumnarSerDe();
    }
    deserializer.initialize(configuration, schema);
    configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
    InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
    RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
    K key = recordReader.createKey();
    V value = recordReader.createValue();
    StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
    StructField field = rowInspector.getStructFieldRef("test");
    Iterator<?> iterator = expectedValues.iterator();
    while (recordReader.next(key, value)) {
        Object expectedValue = iterator.next();
        Object rowData = deserializer.deserialize(value);
        Object actualValue = rowInspector.getStructFieldData(rowData, field);
        actualValue = decodeRecordReaderValue(type, actualValue);
        assertColumnValueEquals(type, actualValue, expectedValue);
    }
    assertFalse(iterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) StructObject(org.apache.hadoop.hive.serde2.StructObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class RCFileGenerator method genData.

private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
    int numFields = 0;
    if (format.equals("student")) {
        rand = new Random(numRows);
        numFields = 3;
    } else if (format.equals("voter")) {
        rand = new Random(1000000000 + numRows);
        numFields = 4;
    } else if (format.equals("alltypes")) {
        rand = new Random(2000000000L + numRows);
        numFields = 10;
    }
    RCFileOutputFormat.setColumnNumber(conf, numFields);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());
    PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
    for (int j = 0; j < numRows; j++) {
        BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
        byte[][] fields = null;
        if (format.equals("student")) {
            byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("voter")) {
            byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("alltypes")) {
            byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() };
            fields = f;
        }
        for (int i = 0; i < fields.length; i++) {
            BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
            row.set(i, field);
            pw.print(new String(fields[i]));
            if (i != fields.length - 1)
                pw.print("\t");
            else
                pw.println();
        }
        writer.append(row);
    }
    writer.close();
    pw.close();
}
Also used : RCFile(org.apache.hadoop.hive.ql.io.RCFile) Random(java.util.Random) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) FileWriter(java.io.FileWriter) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) PrintWriter(java.io.PrintWriter) FileWriter(java.io.FileWriter) PrintWriter(java.io.PrintWriter) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 3 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();
    RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
    Configuration jonconf = new Configuration(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    JobContext context = new Job(jonconf);
    HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
    List<InputSplit> splits = inputFormat.getSplits(context);
    assertEquals("splits length should be " + splitNumber, splitNumber, splits.size());
    int readCount = 0;
    for (int i = 0; i < splits.size(); i++) {
        TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
        RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
        rr.initialize(splits.get(i), tac);
        while (rr.nextKeyValue()) {
            readCount++;
        }
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RCFile(org.apache.hadoop.hive.ql.io.RCFile) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 4 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class RCFileCat method printRecord.

/**
 * Print record to string builder
 * @param value
 * @param buf
 * @throws IOException
 */
private void printRecord(BytesRefArrayWritable value, StringBuilder buf) throws IOException {
    int n = value.size();
    if (n > 0) {
        BytesRefWritable v = value.unCheckedGet(0);
        ByteBuffer bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
        buf.append(decoder.decode(bb));
        for (int i = 1; i < n; i++) {
            // do not put the TAB for the last column
            buf.append(RCFileCat.TAB);
            v = value.unCheckedGet(i);
            bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
            buf.append(decoder.decode(bb));
        }
        buf.append(RCFileCat.NEWLINE);
    }
}
Also used : ByteBuffer(java.nio.ByteBuffer) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 5 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method performRCFileFullyReadColumnTest.

public int performRCFileFullyReadColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException {
    byte[][] checkBytes = null;
    BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
    if (chechCorrect) {
        resetRandomGenerators();
        checkBytes = new byte[allColumnsNumber][];
    }
    int actualReadCount = 0;
    ColumnProjectionUtils.setReadAllColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
        reader.getCurrentRow(cols);
        boolean ok = true;
        if (chechCorrect) {
            nextRandomRow(checkBytes, checkRow);
            ok = ok && checkRow.equals(cols);
        }
        if (!ok) {
            throw new IllegalStateException("Compare read and write error.");
        }
        actualReadCount++;
    }
    return actualReadCount;
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable)

Aggregations

BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)28 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)17 Configuration (org.apache.hadoop.conf.Configuration)13 LongWritable (org.apache.hadoop.io.LongWritable)12 Path (org.apache.hadoop.fs.Path)11 Test (org.junit.Test)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Properties (java.util.Properties)7 RecordReader (org.apache.hadoop.mapred.RecordReader)7 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 RCFile (org.apache.hadoop.hive.ql.io.RCFile)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 JobConf (org.apache.hadoop.mapred.JobConf)4 IOException (java.io.IOException)3 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 SimpleMapEqualComparer (org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 Random (java.util.Random)2