Search in sources :

Example 46 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by axbaretto.

the class TestFixedWidthWriter method testFillEmpties.

/**
 * Required, fixed-width vectors are back-filling with 0 to fill in missing
 * values. While using zero is not strictly SQL compliant, it is better
 * than failing. (The SQL solution would be to fill with nulls, but a
 * required vector does not support nulls...)
 */
@Test
public void testFillEmpties() {
    try (IntVector vector = allocVector(1000)) {
        TestIndex index = new TestIndex();
        IntColumnWriter writer = makeWriter(vector, index);
        writer.startWrite();
        for (int i = 0; i < 501; i += 5) {
            index.index = i;
            writer.startRow();
            writer.setInt(i);
            writer.saveRow();
        }
        // At end, vector index defined to point one past the
        // last row. That is, the vector index gives the row count.
        index.index = 504;
        writer.endWrite();
        for (int i = 0; i < 504; i++) {
            assertEquals("Mismatch on " + i, (i % 5) == 0 ? i : 0, vector.getAccessor().get(i));
        }
    }
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) IntColumnWriter(org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 47 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by axbaretto.

the class TestFixedWidthWriter method testSkipNulls.

/**
 * Test the case in which a scalar vector is used in conjunction
 * with a nullable bits vector. The nullable vector will call the
 * <tt>skipNulls()</tt> method to avoid writing values for null
 * entries. (Without the call, the scalar writer will fill the
 * empty values with zeros.)
 */
@Test
public void testSkipNulls() {
    try (IntVector vector = allocVector(1000)) {
        TestIndex index = new TestIndex();
        IntColumnWriter writer = makeWriter(vector, index);
        writer.startWrite();
        // Write values, skipping four out of five positions,
        // skipping nulls.
        // The loop will cause the vector to double in size.
        // The number of values is odd, forcing the writer to
        // skip nulls at the end as well as between values.
        long origAddr = vector.getBuffer().addr();
        for (int i = 0; i < 3000; i += 5) {
            index.index = i;
            writer.startRow();
            writer.skipNulls();
            writer.setInt(i);
            writer.saveRow();
        }
        index.index = 3003;
        writer.startRow();
        writer.skipNulls();
        writer.saveRow();
        writer.endWrite();
        // Should have been reallocated.
        assertNotEquals(origAddr, vector.getBuffer().addr());
        for (int i = 0; i < 1000; i++) {
            assertEquals("Mismatch at " + i, (i % 5) == 0 ? i : 0xdeadbeef, vector.getAccessor().get(i));
        }
        for (int i = 1005; i < 3000; i += 5) {
            assertEquals(i, vector.getAccessor().get(i));
        }
    }
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) IntColumnWriter(org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 48 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by axbaretto.

the class TestFixedWidthWriter method testWrite.

/**
 * Basic test to write a contiguous set of values, enough to cause
 * the vector to double in size twice, then read back the values.
 */
@Test
public void testWrite() {
    try (IntVector vector = allocVector(1000)) {
        TestIndex index = new TestIndex();
        IntColumnWriter writer = makeWriter(vector, index);
        writer.startWrite();
        // Write integers.
        // Write enough that the vector is resized.
        long origAddr = vector.getBuffer().addr();
        for (int i = 0; i < 3000; i++) {
            index.index = i;
            writer.setInt(i * 10);
        }
        writer.endWrite();
        // Should have been reallocated.
        assertNotEquals(origAddr, vector.getBuffer().addr());
        for (int i = 0; i < 3000; i++) {
            assertEquals(i * 10, vector.getAccessor().get(i));
        }
    }
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) IntColumnWriter(org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 49 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by axbaretto.

the class HashTableTemplate method resizeAndRehashIfNeeded.

// Resize the hash table if needed by creating a new one with double the number of buckets.
// For each entry in the old hash table, re-hash it to the new table and update the metadata
// in the new table.. the metadata consists of the startIndices, links and hashValues.
// Note that the keys stored in the BatchHolders are not moved around.
private void resizeAndRehashIfNeeded() {
    if (numEntries < threshold) {
        return;
    }
    if (EXTRA_DEBUG) {
        logger.debug("Hash table numEntries = {}, threshold = {}; resizing the table...", numEntries, threshold);
    }
    // future attempts to resize will return immediately.
    if (tableSize == MAXIMUM_CAPACITY) {
        threshold = Integer.MAX_VALUE;
        return;
    }
    int newTableSize = 2 * tableSize;
    newTableSize = roundUpToPowerOf2(newTableSize);
    // the new hash-values (to replace the existing ones - inside rehash() ), then OOM
    if (4 * /* sizeof(int) */
    (newTableSize + 2 * HashTable.BATCH_SIZE) >= allocator.getLimit() - allocator.getAllocatedMemory()) {
        throw new OutOfMemoryException("Resize Hash Table");
    }
    tableSize = newTableSize;
    if (tableSize > MAXIMUM_CAPACITY) {
        tableSize = MAXIMUM_CAPACITY;
    }
    long t0 = System.currentTimeMillis();
    // set the new threshold based on the new table size and load factor
    threshold = (int) Math.ceil(tableSize * htConfig.getLoadFactor());
    IntVector newStartIndices = allocMetadataVector(tableSize, EMPTY_SLOT);
    for (int i = 0; i < batchHolders.size(); i++) {
        BatchHolder bh = batchHolders.get(i);
        int batchStartIdx = i * BATCH_SIZE;
        bh.rehash(tableSize, newStartIndices, batchStartIdx);
    }
    startIndices.clear();
    startIndices = newStartIndices;
    if (EXTRA_DEBUG) {
        logger.debug("After resizing and rehashing, dumping the hash table...");
        logger.debug("Number of buckets = {}.", startIndices.getAccessor().getValueCount());
        for (int i = 0; i < startIndices.getAccessor().getValueCount(); i++) {
            logger.debug("Bucket: {}, startIdx[ {} ] = {}.", i, i, startIndices.getAccessor().get(i));
            int idx = startIndices.getAccessor().get(i);
            BatchHolder bh = batchHolders.get((idx >>> 16) & BATCH_MASK);
            bh.dump(idx);
        }
    }
    resizingTime += System.currentTimeMillis() - t0;
    numResizing++;
}
Also used : BigIntVector(org.apache.drill.exec.vector.BigIntVector) IntVector(org.apache.drill.exec.vector.IntVector) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Example 50 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by apache.

the class TestToNullable method testFixedWidth.

@Test
public void testFixedWidth() {
    MaterializedField intSchema = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED);
    @SuppressWarnings("resource") IntVector intVector = new IntVector(intSchema, fixture.allocator());
    IntVector.Mutator intMutator = intVector.getMutator();
    intVector.allocateNew(100);
    for (int i = 0; i < 100; i++) {
        intMutator.set(i, i * 10);
    }
    intMutator.setValueCount(100);
    MaterializedField nullableIntSchema = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.OPTIONAL);
    NullableIntVector nullableIntVector = new NullableIntVector(nullableIntSchema, fixture.allocator());
    intVector.toNullable(nullableIntVector);
    assertEquals(0, intVector.getAccessor().getValueCount());
    NullableIntVector.Accessor niAccessor = nullableIntVector.getAccessor();
    assertEquals(100, niAccessor.getValueCount());
    for (int i = 0; i < 100; i++) {
        assertFalse(niAccessor.isNull(i));
        assertEquals(i * 10, niAccessor.get(i));
    }
    nullableIntVector.clear();
// Don't clear the intVector, it should be empty.
// If it is not, the test will fail with a memory leak error.
}
Also used : NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) IntVector(org.apache.drill.exec.vector.IntVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

IntVector (org.apache.drill.exec.vector.IntVector)69 Test (org.junit.Test)56 BigIntVector (org.apache.drill.exec.vector.BigIntVector)26 SchemaPath (org.apache.drill.common.expression.SchemaPath)23 ExecTest (org.apache.drill.exec.ExecTest)22 SubOperatorTest (org.apache.drill.test.SubOperatorTest)21 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)18 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)18 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)18 SimpleRootExec (org.apache.drill.exec.physical.impl.SimpleRootExec)18 PhysicalPlanReader (org.apache.drill.exec.planner.PhysicalPlanReader)18 OperatorTest (org.apache.drill.categories.OperatorTest)14 IntColumnWriter (org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter)14 DrillbitContext (org.apache.drill.exec.server.DrillbitContext)13 FragmentContextImpl (org.apache.drill.exec.ops.FragmentContextImpl)12 UserClientConnection (org.apache.drill.exec.rpc.UserClientConnection)12 BigIntHolder (org.apache.drill.exec.expr.holders.BigIntHolder)6 IntHolder (org.apache.drill.exec.expr.holders.IntHolder)6 FragmentContext (org.apache.drill.exec.ops.FragmentContext)6 MaterializedField (org.apache.drill.exec.record.MaterializedField)6