Search in sources :

Example 56 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class OctetLength method evaluate.

// Calculate the length of the UTF-8 strings in input vector and place results in output vector.
@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    int[] length = inputColVector.length;
    long[] resultLen = outV.vector;
    if (n == 0) {
        //Nothing to do
        return;
    }
    if (inputColVector.noNulls) {
        outV.noNulls = true;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            resultLen[0] = length[0];
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                resultLen[i] = length[i];
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                resultLen[i] = length[i];
            }
            outV.isRepeating = false;
        }
    } else {
        /*
       * Handle case with nulls. Don't do function if the value is null, to save time,
       * because calling the function can be expensive.
       */
        outV.noNulls = false;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inputColVector.isNull[0];
            if (!inputColVector.isNull[0]) {
                resultLen[0] = length[0];
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!inputColVector.isNull[i]) {
                    resultLen[i] = length[i];
                }
                outV.isNull[i] = inputColVector.isNull[i];
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    resultLen[i] = length[i];
                }
                outV.isNull[i] = inputColVector.isNull[i];
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 57 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class CastStringToDecimal method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn];
    if (n == 0) {
        // Nothing to do
        return;
    }
    if (inV.noNulls) {
        outV.noNulls = true;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            func(outV, inV, 0);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                func(outV, inV, i);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                func(outV, inV, i);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null,
        // because the data may be undefined for a null value.
        outV.noNulls = false;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inV.isNull[0];
            if (!inV.isNull[0]) {
                func(outV, inV, 0);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.isNull[i] = inV.isNull[i];
                if (!inV.isNull[i]) {
                    func(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        } else {
            System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inV.isNull[i]) {
                    func(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 58 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class CastStringToIntervalDayTime method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumn];
    if (n == 0) {
        // Nothing to do
        return;
    }
    if (inV.noNulls) {
        outV.noNulls = true;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            evaluate(outV, inV, 0);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null,
        // because the data may be undefined for a null value.
        outV.noNulls = false;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inV.isNull[0];
            if (!inV.isNull[0]) {
                evaluate(outV, inV, 0);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.isNull[i] = inV.isNull[i];
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        } else {
            System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 59 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestInputOutputFormat method testSchemaEvolution.

/**
   * Test schema evolution when using the reader directly.
   */
@Test
public void testSchemaEvolution() throws Exception {
    TypeDescription fileSchema = TypeDescription.fromString("struct<a:int,b:struct<c:int>,d:string>");
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).fileSystem(fs).setSchema(fileSchema).compress(org.apache.orc.CompressionKind.NONE));
    VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
    batch.size = 1000;
    LongColumnVector lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    for (int r = 0; r < 1000; r++) {
        ((LongColumnVector) batch.cols[0]).vector[r] = r * 42;
        lcv.vector[r] = r * 10001;
        ((BytesColumnVector) batch.cols[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
    }
    writer.addRowBatch(batch);
    writer.close();
    TypeDescription readerSchema = TypeDescription.fromString("struct<a:int,b:struct<c:int,future1:int>,d:string,future2:int>");
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rowsOptions(new Reader.Options().schema(readerSchema));
    batch = readerSchema.createRowBatch();
    lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
    assertEquals(true, rows.nextBatch(batch));
    assertEquals(1000, batch.size);
    assertEquals(true, future1.isRepeating);
    assertEquals(true, future1.isNull[0]);
    assertEquals(true, batch.cols[3].isRepeating);
    assertEquals(true, batch.cols[3].isNull[0]);
    for (int r = 0; r < batch.size; ++r) {
        assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
        assertEquals("row " + r, r * 10001, lcv.vector[r]);
        assertEquals("row " + r, r * 10001, lcv.vector[r]);
        assertEquals("row " + r, Integer.toHexString(r), ((BytesColumnVector) batch.cols[2]).toString(r));
    }
    assertEquals(false, rows.nextBatch(batch));
    rows.close();
    // try it again with an include vector
    rows = reader.rowsOptions(new Reader.Options().schema(readerSchema).include(new boolean[] { false, true, true, true, false, false, true }));
    batch = readerSchema.createRowBatch();
    lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
    assertEquals(true, rows.nextBatch(batch));
    assertEquals(1000, batch.size);
    assertEquals(true, future1.isRepeating);
    assertEquals(true, future1.isNull[0]);
    assertEquals(true, batch.cols[3].isRepeating);
    assertEquals(true, batch.cols[3].isNull[0]);
    assertEquals(true, batch.cols[2].isRepeating);
    assertEquals(true, batch.cols[2].isNull[0]);
    for (int r = 0; r < batch.size; ++r) {
        assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
        assertEquals("row " + r, r * 10001, lcv.vector[r]);
    }
    assertEquals(false, rows.nextBatch(batch));
    rows.close();
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) TypeDescription(org.apache.orc.TypeDescription) RecordWriter(org.apache.hadoop.mapred.RecordWriter) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 60 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestInputOutputFormat method testVectorizationWithAcid.

// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {
    StructObjectInspector inspector = new BigRowInspector();
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorizationAcid", inspector, true, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir, new AcidOutputFormat.Options(conf).maximumTransactionId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
    for (int i = 0; i < 100; ++i) {
        BigRow row = new BigRow(i);
        writer.insert(10, row);
    }
    writer.close(false);
    Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
    DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
    DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
    BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
    DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
    LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
    TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
    for (int i = 0; i < 100; i++) {
        assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]);
        assertEquals("checking byte " + i, (byte) i, byteColumn.vector[i]);
        assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
        assertEquals("checking int " + i, i, intColumn.vector[i]);
        assertEquals("checking long " + i, i, longColumn.vector[i]);
        assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
        assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
        Text strValue = new Text();
        strValue.set(stringColumn.vector[i], stringColumn.start[i], stringColumn.length[i]);
        assertEquals("checking string " + i, new Text(Long.toHexString(i)), strValue);
        assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal());
        assertEquals("checking date " + i, i, dateColumn.vector[i]);
        long millis = (long) i * MILLIS_IN_DAY;
        millis -= LOCAL_TIMEZONE.getOffset(millis);
        assertEquals("checking timestamp " + i, millis, timestampColumn.getTime(i));
    }
    assertEquals(false, reader.next(key, value));
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Path(org.apache.hadoop.fs.Path) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3