Search in sources :

Example 1 with Long2IntOpenHashMap

use of it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap in project pinot by linkedin.

the class NoDictionarySingleColumnGroupKeyGenerator method createGroupKeyMap.

/**
   * Helper method to create the group-key map, depending on the data type.
   * Uses primitive maps when possible.
   *
   * @param keyType DataType for the key
   * @return Map
   */
private Map createGroupKeyMap(FieldSpec.DataType keyType) {
    Map map;
    switch(keyType) {
        case INT:
            Int2IntMap intMap = new Int2IntOpenHashMap();
            intMap.defaultReturnValue(INVALID_ID);
            map = intMap;
            break;
        case LONG:
            Long2IntOpenHashMap longMap = new Long2IntOpenHashMap();
            longMap.defaultReturnValue(INVALID_ID);
            map = longMap;
            break;
        case FLOAT:
            Float2IntOpenHashMap floatMap = new Float2IntOpenHashMap();
            floatMap.defaultReturnValue(INVALID_ID);
            map = floatMap;
            break;
        case DOUBLE:
            Double2IntOpenHashMap doubleMap = new Double2IntOpenHashMap();
            doubleMap.defaultReturnValue(INVALID_ID);
            map = doubleMap;
            break;
        case STRING:
            Object2IntOpenHashMap<String> stringMap = new Object2IntOpenHashMap<>();
            stringMap.defaultReturnValue(INVALID_ID);
            map = stringMap;
            break;
        default:
            throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + keyType);
    }
    return map;
}
Also used : Long2IntOpenHashMap(it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap) Float2IntOpenHashMap(it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap) Double2IntOpenHashMap(it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) Double2IntOpenHashMap(it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) Long2IntMap(it.unimi.dsi.fastutil.longs.Long2IntMap) Float2IntMap(it.unimi.dsi.fastutil.floats.Float2IntMap) Double2IntMap(it.unimi.dsi.fastutil.doubles.Double2IntMap) Long2IntOpenHashMap(it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) Map(java.util.Map) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) Float2IntOpenHashMap(it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap)

Example 2 with Long2IntOpenHashMap

use of it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap in project pinot by linkedin.

the class SegmentDictionaryCreator method build.

public void build(boolean[] isSorted) throws Exception {
    switch(spec.getDataType()) {
        case INT:
            final FixedByteSingleValueMultiColWriter intDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.INT_DICTIONARY_COL_SIZE);
            intValueToIndexMap = new Int2IntOpenHashMap(rowCount);
            int[] sortedInts = (int[]) sortedList;
            for (int i = 0; i < rowCount; i++) {
                final int entry = sortedInts[i];
                intDictionaryWrite.setInt(i, 0, entry);
                intValueToIndexMap.put(entry, i);
            }
            intDictionaryWrite.close();
            break;
        case FLOAT:
            final FixedByteSingleValueMultiColWriter floatDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.FLOAT_DICTIONARY_COL_SIZE);
            floatValueToIndexMap = new Float2IntOpenHashMap(rowCount);
            float[] sortedFloats = (float[]) sortedList;
            for (int i = 0; i < rowCount; i++) {
                final float entry = sortedFloats[i];
                floatDictionaryWrite.setFloat(i, 0, entry);
                floatValueToIndexMap.put(entry, i);
            }
            floatDictionaryWrite.close();
            break;
        case LONG:
            final FixedByteSingleValueMultiColWriter longDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.LONG_DICTIONARY_COL_SIZE);
            longValueToIndexMap = new Long2IntOpenHashMap(rowCount);
            long[] sortedLongs = (long[]) sortedList;
            for (int i = 0; i < rowCount; i++) {
                final long entry = sortedLongs[i];
                longDictionaryWrite.setLong(i, 0, entry);
                longValueToIndexMap.put(entry, i);
            }
            longDictionaryWrite.close();
            break;
        case DOUBLE:
            final FixedByteSingleValueMultiColWriter doubleDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.DOUBLE_DICTIONARY_COL_SIZE);
            doubleValueToIndexMap = new Double2IntOpenHashMap(rowCount);
            double[] sortedDoubles = (double[]) sortedList;
            for (int i = 0; i < rowCount; i++) {
                final double entry = sortedDoubles[i];
                doubleDictionaryWrite.setDouble(i, 0, entry);
                doubleValueToIndexMap.put(entry, i);
            }
            doubleDictionaryWrite.close();
            break;
        case STRING:
        case BOOLEAN:
            Object[] sortedObjects = (Object[]) sortedList;
            // make sure that there is non-zero sized dictionary JIRA:PINOT-2947
            stringColumnMaxLength = 1;
            for (final Object e : sortedObjects) {
                String val = e.toString();
                int length = val.getBytes(utf8CharSet).length;
                if (stringColumnMaxLength < length) {
                    stringColumnMaxLength = length;
                }
            }
            final FixedByteSingleValueMultiColWriter stringDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, new int[] { stringColumnMaxLength });
            final String[] revised = new String[rowCount];
            Map<String, String> revisedMap = new HashMap<String, String>();
            for (int i = 0; i < rowCount; i++) {
                final String toWrite = sortedObjects[i].toString();
                String entry = getPaddedString(toWrite, stringColumnMaxLength, paddingChar);
                revised[i] = entry;
                if (isSorted[0] && i > 0 && (revised[i - 1].compareTo(entry) > 0)) {
                    isSorted[0] = false;
                }
                assert (revised[i].getBytes(utf8CharSet).length == stringColumnMaxLength);
                revisedMap.put(revised[i], toWrite);
            }
            if (revisedMap.size() != sortedObjects.length) {
                // Two strings map to the same padded string in the current column
                throw new RuntimeException("Number of entries in dictionary != number of unique values in the data in column " + spec.getName());
            }
            Arrays.sort(revised);
            stringValueToIndexMap = new Object2IntOpenHashMap<>(rowCount);
            for (int i = 0; i < revised.length; i++) {
                stringDictionaryWrite.setString(i, 0, revised[i]);
                // No need to store padded value, we can store and lookup by raw value. In certain cases, original sorted order
                // may be different from revised sorted order [PINOT-2730], so would need to use the original order in value
                // to index map.
                String origString = revisedMap.get(revised[i]);
                stringValueToIndexMap.put(origString, i);
            }
            stringDictionaryWrite.close();
            break;
        default:
            throw new RuntimeException("Unhandled type " + spec.getDataType());
    }
}
Also used : Long2IntOpenHashMap(it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap) Double2IntOpenHashMap(it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) HashMap(java.util.HashMap) Long2IntOpenHashMap(it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) Float2IntOpenHashMap(it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap) FixedByteSingleValueMultiColWriter(com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) Float2IntOpenHashMap(it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap) Double2IntOpenHashMap(it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap)

Aggregations

Double2IntOpenHashMap (it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap)2 Float2IntOpenHashMap (it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap)2 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)2 Long2IntOpenHashMap (it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap)2 Object2IntOpenHashMap (it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)2 FixedByteSingleValueMultiColWriter (com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter)1 Double2IntMap (it.unimi.dsi.fastutil.doubles.Double2IntMap)1 Float2IntMap (it.unimi.dsi.fastutil.floats.Float2IntMap)1 Int2IntMap (it.unimi.dsi.fastutil.ints.Int2IntMap)1 Long2IntMap (it.unimi.dsi.fastutil.longs.Long2IntMap)1 Object2IntMap (it.unimi.dsi.fastutil.objects.Object2IntMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1