use of com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter in project pinot by linkedin.
the class SegmentDictionaryCreator method build.
public void build(boolean[] isSorted) throws Exception {
switch(spec.getDataType()) {
case INT:
final FixedByteSingleValueMultiColWriter intDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.INT_DICTIONARY_COL_SIZE);
intValueToIndexMap = new Int2IntOpenHashMap(rowCount);
int[] sortedInts = (int[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final int entry = sortedInts[i];
intDictionaryWrite.setInt(i, 0, entry);
intValueToIndexMap.put(entry, i);
}
intDictionaryWrite.close();
break;
case FLOAT:
final FixedByteSingleValueMultiColWriter floatDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.FLOAT_DICTIONARY_COL_SIZE);
floatValueToIndexMap = new Float2IntOpenHashMap(rowCount);
float[] sortedFloats = (float[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final float entry = sortedFloats[i];
floatDictionaryWrite.setFloat(i, 0, entry);
floatValueToIndexMap.put(entry, i);
}
floatDictionaryWrite.close();
break;
case LONG:
final FixedByteSingleValueMultiColWriter longDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.LONG_DICTIONARY_COL_SIZE);
longValueToIndexMap = new Long2IntOpenHashMap(rowCount);
long[] sortedLongs = (long[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final long entry = sortedLongs[i];
longDictionaryWrite.setLong(i, 0, entry);
longValueToIndexMap.put(entry, i);
}
longDictionaryWrite.close();
break;
case DOUBLE:
final FixedByteSingleValueMultiColWriter doubleDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.DOUBLE_DICTIONARY_COL_SIZE);
doubleValueToIndexMap = new Double2IntOpenHashMap(rowCount);
double[] sortedDoubles = (double[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final double entry = sortedDoubles[i];
doubleDictionaryWrite.setDouble(i, 0, entry);
doubleValueToIndexMap.put(entry, i);
}
doubleDictionaryWrite.close();
break;
case STRING:
case BOOLEAN:
Object[] sortedObjects = (Object[]) sortedList;
// make sure that there is non-zero sized dictionary JIRA:PINOT-2947
stringColumnMaxLength = 1;
for (final Object e : sortedObjects) {
String val = e.toString();
int length = val.getBytes(utf8CharSet).length;
if (stringColumnMaxLength < length) {
stringColumnMaxLength = length;
}
}
final FixedByteSingleValueMultiColWriter stringDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, new int[] { stringColumnMaxLength });
final String[] revised = new String[rowCount];
Map<String, String> revisedMap = new HashMap<String, String>();
for (int i = 0; i < rowCount; i++) {
final String toWrite = sortedObjects[i].toString();
String entry = getPaddedString(toWrite, stringColumnMaxLength, paddingChar);
revised[i] = entry;
if (isSorted[0] && i > 0 && (revised[i - 1].compareTo(entry) > 0)) {
isSorted[0] = false;
}
assert (revised[i].getBytes(utf8CharSet).length == stringColumnMaxLength);
revisedMap.put(revised[i], toWrite);
}
if (revisedMap.size() != sortedObjects.length) {
// Two strings map to the same padded string in the current column
throw new RuntimeException("Number of entries in dictionary != number of unique values in the data in column " + spec.getName());
}
Arrays.sort(revised);
stringValueToIndexMap = new Object2IntOpenHashMap<>(rowCount);
for (int i = 0; i < revised.length; i++) {
stringDictionaryWrite.setString(i, 0, revised[i]);
// No need to store padded value, we can store and lookup by raw value. In certain cases, original sorted order
// may be different from revised sorted order [PINOT-2730], so would need to use the original order in value
// to index map.
String origString = revisedMap.get(revised[i]);
stringValueToIndexMap.put(origString, i);
}
stringDictionaryWrite.close();
break;
default:
throw new RuntimeException("Unhandled type " + spec.getDataType());
}
}
Aggregations