use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.
the class AbstractSelectFilter method addImport.
@Override
public void addImport(Import imp) {
if (values == null) {
values = new HashSet<>();
}
final ColumnStore store = getColumn().getTypeFor(imp);
values.addAll(Sets.newHashSet(((StringStore) store).iterator()));
}
use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.
the class ImportJob method applyDictionaryMappings.
/**
* Apply new positions into incoming shared dictionaries.
*/
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
final String columnName = entry.getKey();
final DictionaryMapping mapping = entry.getValue();
final StringStore stringStore = (StringStore) values.get(columnName);
log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
// we need to find a new Type for the index-Column as it's going to be remapped and might change in size
final IntegerParser indexParser = new IntegerParser(config);
final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
indexParser.setLines(stringStore.getLines());
indexParser.setMinValue(statistics.getMin());
indexParser.setMaxValue(statistics.getMax());
final IntegerStore newType = indexParser.findBestType();
log.trace("Decided for {}", newType);
mapping.applyToStore(stringStore, newType);
stringStore.setIndexStore(newType);
subJob.report(1);
}
}
use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.
the class StringParser method decideType.
@Override
protected StringStore decideType() {
// check if a singleton type is enough
if (strings.isEmpty()) {
return EmptyStore.INSTANCE;
}
// Is this a singleton?
if (strings.size() == 1) {
StringTypeSingleton type = new StringTypeSingleton(strings.keySet().iterator().next(), BitSetStore.create(getLines()));
return type;
}
// remove prefix and suffix
if (!StringUtils.isEmpty(prefix) || !StringUtils.isEmpty(suffix)) {
log.debug("Reduced strings by the '{}' prefix and '{}' suffix", prefix, suffix);
Object2IntMap<String> oldStrings = strings;
strings = new Object2IntOpenHashMap<>(oldStrings.size());
for (Object2IntMap.Entry<String> e : oldStrings.object2IntEntrySet()) {
strings.put(e.getKey().substring(prefix.length(), e.getKey().length() - suffix.length()), e.getIntValue());
}
}
decode();
// Try all guesses and select the least memory intensive one.
// TODO FK: Simplify this, the guessers do a lot of weird lazy computation but implicit.
Guess guess = Stream.of(new TrieTypeGuesser(this), new MapTypeGuesser(this), new NumberTypeGuesser(this, getConfig())).map(StringTypeGuesser::createGuess).filter(Objects::nonNull).min(Comparator.naturalOrder()).orElseThrow();
log.debug("\tUsing {}(est. {})", guess.getGuesser(), BinaryByteUnit.format(guess.estimate()));
StringStore result = guess.getType();
// wrap in prefix suffix
if (!Strings.isNullOrEmpty(prefix) || !Strings.isNullOrEmpty(suffix)) {
result = new StringTypePrefixSuffix(result, prefix, suffix);
}
return result;
}
use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.
the class Preprocessed method collectDictionaries.
private static Map<String, Dictionary> collectDictionaries(Map<String, ColumnStore> columnStores) {
final Map<String, Dictionary> collect = new HashMap<>();
for (Map.Entry<String, ColumnStore> entry : columnStores.entrySet()) {
if (!(entry.getValue() instanceof StringStore)) {
continue;
}
final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary();
if (dictionary == null) {
continue;
}
collect.put(entry.getKey(), dictionary);
}
return collect;
}
use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.
the class MultiSelectFilterNode method findIds.
private int[] findIds(Bucket bucket, String[] values) {
int[] selectedValues = new int[values.length];
StringStore type = (StringStore) bucket.getStore(getColumn());
for (int index = 0; index < values.length; index++) {
String select = values[index];
int parsed = type.getId(select);
selectedValues[index] = parsed;
}
return selectedValues;
}
Aggregations