Search in sources :

Example 6 with SegmentDictionaryCreator

use of com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator in project pinot by linkedin.

the class DictionariesTest method testPaddedNoConflict.

/**
   * Tests SegmentDictionaryCreator for case when there is one empty string
   * and a string with a single '%' character
   *
   * This test asserts that the padded length of the empty string is 1
   * in actual padded dictionary), and not 0.
   *
   * @throws Exception
   */
@Test
public void testPaddedNoConflict() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
    String[] inputStrings = new String[2];
    String[] paddedStrings = new String[2];
    char paddingChar = '\0';
    inputStrings[0] = "";
    inputStrings[1] = "%";
    // Sorted order: {"", "%"}
    Arrays.sort(inputStrings);
    SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, paddingChar);
    boolean[] isSorted = new boolean[1];
    isSorted[0] = true;
    dictionaryCreator.build(isSorted);
    // Get the padded string as stored in the dictionary.
    int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength();
    for (int i = 0; i < inputStrings.length; i++) {
        paddedStrings[i] = SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength, paddingChar);
    }
    // Sorted Order: {"abc def%%%%", "abc%%%%%%%"}
    Arrays.sort(paddedStrings);
    // Assert that indexOfSV for un-padded string returns the index of the corresponding padded string.
    for (int i = 0; i < inputStrings.length; i++) {
        int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]);
        Assert.assertTrue(paddedStrings[paddedIndex].equals(SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength, paddingChar)));
    }
    dictionaryCreator.close();
    FileUtils.deleteQuietly(indexDir);
    FileUtils.deleteQuietly(indexDir);
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) File(java.io.File) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 7 with SegmentDictionaryCreator

use of com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator in project pinot by linkedin.

the class DictionariesTest method testSingleEmptyString.

/**
   * Tests SegmentDictionaryCreator for case when there is only one string
   * and it is empty
   *
   * This test asserts that the padded length of the empty string is 1
   * in actual padded dictionary), and not 0.
   *
   * @throws Exception
   */
@Test
public void testSingleEmptyString() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    indexDir.deleteOnExit();
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
    String[] inputStrings = new String[1];
    String[] paddedStrings = new String[1];
    try {
        inputStrings[0] = "";
        // Sorted order: {""}
        Arrays.sort(inputStrings);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = true;
        SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
        dictionaryCreator.build(isSorted);
        // Get the padded string as stored in the dictionary.
        int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength();
        Assert.assertTrue(targetPaddedLength == 1);
        for (int i = 0; i < inputStrings.length; i++) {
            paddedStrings[i] = SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
        }
        // Sorted Order: {"%"}
        Arrays.sort(paddedStrings);
        // Assert that indexOfSV for un-padded string returns the index of the corresponding padded string.
        for (int i = 0; i < inputStrings.length; i++) {
            int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]);
            Assert.assertTrue(paddedStrings[paddedIndex].equals(SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength, V1Constants.Str.DEFAULT_STRING_PAD_CHAR)));
        }
        // Verify that empty string got padded
        Assert.assertTrue(paddedStrings[0].equals(SegmentDictionaryCreator.getPaddedString(inputStrings[0], targetPaddedLength, V1Constants.Str.DEFAULT_STRING_PAD_CHAR)));
        dictionaryCreator.close();
    } catch (Exception e) {
        throw e;
    } finally {
        FileUtils.deleteQuietly(indexDir);
    }
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) File(java.io.File) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)7 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 File (java.io.File)6 Test (org.testng.annotations.Test)6 ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)1 MultiValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)1 SingleValueSortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator)1