use of it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap in project druid by druid-io.
the class ListFilteredDimensionSpec method filterBlackList.
private DimensionSelector filterBlackList(DimensionSelector selector) {
final int selectorCardinality = selector.getValueCardinality();
if (selectorCardinality < 0 || !selector.nameLookupPossibleInAdvance()) {
return new PredicateFilteredDimensionSelector(selector, new Predicate<String>() {
@Override
public boolean apply(@Nullable String input) {
return !values.contains(input);
}
});
}
final int maxPossibleFilteredCardinality = selectorCardinality;
int count = 0;
final Int2IntMap forwardMapping = new Int2IntOpenHashMap(maxPossibleFilteredCardinality);
forwardMapping.defaultReturnValue(-1);
final int[] reverseMapping = new int[maxPossibleFilteredCardinality];
for (int i = 0; i < selectorCardinality; i++) {
if (!values.contains(Strings.nullToEmpty(selector.lookupName(i)))) {
forwardMapping.put(i, count);
reverseMapping[count++] = i;
}
}
return new ForwardingFilteredDimensionSelector(selector, forwardMapping, reverseMapping);
}
use of it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap in project druid by druid-io.
the class RegexFilteredDimensionSpec method decorate.
@Override
public DimensionSelector decorate(final DimensionSelector selector) {
if (selector == null) {
return null;
}
final int selectorCardinality = selector.getValueCardinality();
if (selectorCardinality < 0 || !selector.nameLookupPossibleInAdvance()) {
return new PredicateFilteredDimensionSelector(selector, new Predicate<String>() {
@Override
public boolean apply(@Nullable String input) {
return compiledRegex.matcher(Strings.nullToEmpty(input)).matches();
}
});
}
int count = 0;
final Int2IntMap forwardMapping = new Int2IntOpenHashMap();
forwardMapping.defaultReturnValue(-1);
for (int i = 0; i < selectorCardinality; i++) {
if (compiledRegex.matcher(Strings.nullToEmpty(selector.lookupName(i))).matches()) {
forwardMapping.put(i, count++);
}
}
final int[] reverseMapping = new int[forwardMapping.size()];
for (Int2IntMap.Entry e : forwardMapping.int2IntEntrySet()) {
reverseMapping[e.getIntValue()] = e.getIntKey();
}
return new ForwardingFilteredDimensionSelector(selector, forwardMapping, reverseMapping);
}
use of it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap in project druid by druid-io.
the class ListFilteredDimensionSpec method filterWhiteList.
private DimensionSelector filterWhiteList(DimensionSelector selector) {
final int selectorCardinality = selector.getValueCardinality();
if (selectorCardinality < 0 || (selector.idLookup() == null && !selector.nameLookupPossibleInAdvance())) {
return new PredicateFilteredDimensionSelector(selector, Predicates.in(values));
}
final int maxPossibleFilteredCardinality = values.size();
int count = 0;
final Int2IntMap forwardMapping = new Int2IntOpenHashMap(maxPossibleFilteredCardinality);
forwardMapping.defaultReturnValue(-1);
final int[] reverseMapping = new int[maxPossibleFilteredCardinality];
IdLookup idLookup = selector.idLookup();
if (idLookup != null) {
for (String value : values) {
int i = idLookup.lookupId(value);
if (i >= 0) {
forwardMapping.put(i, count);
reverseMapping[count++] = i;
}
}
} else {
for (int i = 0; i < selectorCardinality; i++) {
if (values.contains(Strings.nullToEmpty(selector.lookupName(i)))) {
forwardMapping.put(i, count);
reverseMapping[count++] = i;
}
}
}
return new ForwardingFilteredDimensionSelector(selector, forwardMapping, reverseMapping);
}
use of it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap in project pinot by linkedin.
the class NoDictionarySingleColumnGroupKeyGenerator method createGroupKeyMap.
/**
* Helper method to create the group-key map, depending on the data type.
* Uses primitive maps when possible.
*
* @param keyType DataType for the key
* @return Map
*/
private Map createGroupKeyMap(FieldSpec.DataType keyType) {
Map map;
switch(keyType) {
case INT:
Int2IntMap intMap = new Int2IntOpenHashMap();
intMap.defaultReturnValue(INVALID_ID);
map = intMap;
break;
case LONG:
Long2IntOpenHashMap longMap = new Long2IntOpenHashMap();
longMap.defaultReturnValue(INVALID_ID);
map = longMap;
break;
case FLOAT:
Float2IntOpenHashMap floatMap = new Float2IntOpenHashMap();
floatMap.defaultReturnValue(INVALID_ID);
map = floatMap;
break;
case DOUBLE:
Double2IntOpenHashMap doubleMap = new Double2IntOpenHashMap();
doubleMap.defaultReturnValue(INVALID_ID);
map = doubleMap;
break;
case STRING:
Object2IntOpenHashMap<String> stringMap = new Object2IntOpenHashMap<>();
stringMap.defaultReturnValue(INVALID_ID);
map = stringMap;
break;
default:
throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + keyType);
}
return map;
}
use of it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap in project pinot by linkedin.
the class SegmentDictionaryCreator method build.
public void build(boolean[] isSorted) throws Exception {
switch(spec.getDataType()) {
case INT:
final FixedByteSingleValueMultiColWriter intDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.INT_DICTIONARY_COL_SIZE);
intValueToIndexMap = new Int2IntOpenHashMap(rowCount);
int[] sortedInts = (int[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final int entry = sortedInts[i];
intDictionaryWrite.setInt(i, 0, entry);
intValueToIndexMap.put(entry, i);
}
intDictionaryWrite.close();
break;
case FLOAT:
final FixedByteSingleValueMultiColWriter floatDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.FLOAT_DICTIONARY_COL_SIZE);
floatValueToIndexMap = new Float2IntOpenHashMap(rowCount);
float[] sortedFloats = (float[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final float entry = sortedFloats[i];
floatDictionaryWrite.setFloat(i, 0, entry);
floatValueToIndexMap.put(entry, i);
}
floatDictionaryWrite.close();
break;
case LONG:
final FixedByteSingleValueMultiColWriter longDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.LONG_DICTIONARY_COL_SIZE);
longValueToIndexMap = new Long2IntOpenHashMap(rowCount);
long[] sortedLongs = (long[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final long entry = sortedLongs[i];
longDictionaryWrite.setLong(i, 0, entry);
longValueToIndexMap.put(entry, i);
}
longDictionaryWrite.close();
break;
case DOUBLE:
final FixedByteSingleValueMultiColWriter doubleDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, V1Constants.Dict.DOUBLE_DICTIONARY_COL_SIZE);
doubleValueToIndexMap = new Double2IntOpenHashMap(rowCount);
double[] sortedDoubles = (double[]) sortedList;
for (int i = 0; i < rowCount; i++) {
final double entry = sortedDoubles[i];
doubleDictionaryWrite.setDouble(i, 0, entry);
doubleValueToIndexMap.put(entry, i);
}
doubleDictionaryWrite.close();
break;
case STRING:
case BOOLEAN:
Object[] sortedObjects = (Object[]) sortedList;
// make sure that there is non-zero sized dictionary JIRA:PINOT-2947
stringColumnMaxLength = 1;
for (final Object e : sortedObjects) {
String val = e.toString();
int length = val.getBytes(utf8CharSet).length;
if (stringColumnMaxLength < length) {
stringColumnMaxLength = length;
}
}
final FixedByteSingleValueMultiColWriter stringDictionaryWrite = new FixedByteSingleValueMultiColWriter(dictionaryFile, rowCount, 1, new int[] { stringColumnMaxLength });
final String[] revised = new String[rowCount];
Map<String, String> revisedMap = new HashMap<String, String>();
for (int i = 0; i < rowCount; i++) {
final String toWrite = sortedObjects[i].toString();
String entry = getPaddedString(toWrite, stringColumnMaxLength, paddingChar);
revised[i] = entry;
if (isSorted[0] && i > 0 && (revised[i - 1].compareTo(entry) > 0)) {
isSorted[0] = false;
}
assert (revised[i].getBytes(utf8CharSet).length == stringColumnMaxLength);
revisedMap.put(revised[i], toWrite);
}
if (revisedMap.size() != sortedObjects.length) {
// Two strings map to the same padded string in the current column
throw new RuntimeException("Number of entries in dictionary != number of unique values in the data in column " + spec.getName());
}
Arrays.sort(revised);
stringValueToIndexMap = new Object2IntOpenHashMap<>(rowCount);
for (int i = 0; i < revised.length; i++) {
stringDictionaryWrite.setString(i, 0, revised[i]);
// No need to store padded value, we can store and lookup by raw value. In certain cases, original sorted order
// may be different from revised sorted order [PINOT-2730], so would need to use the original order in value
// to index map.
String origString = revisedMap.get(revised[i]);
stringValueToIndexMap.put(origString, i);
}
stringDictionaryWrite.close();
break;
default:
throw new RuntimeException("Unhandled type " + spec.getDataType());
}
}
Aggregations