Search in sources :

Example 1 with StringStore

use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.

the class AbstractSelectFilter method addImport.

@Override
public void addImport(Import imp) {
    if (values == null) {
        values = new HashSet<>();
    }
    final ColumnStore store = getColumn().getTypeFor(imp);
    values.addAll(Sets.newHashSet(((StringStore) store).iterator()));
}
Also used : ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore)

Example 2 with StringStore

use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.

the class ImportJob method applyDictionaryMappings.

/**
 * Apply new positions into incoming shared dictionaries.
 */
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
    final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
    for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
        final String columnName = entry.getKey();
        final DictionaryMapping mapping = entry.getValue();
        final StringStore stringStore = (StringStore) values.get(columnName);
        log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
        // we need to find a new Type for the index-Column as it's going to be remapped and might change in size
        final IntegerParser indexParser = new IntegerParser(config);
        final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
        indexParser.setLines(stringStore.getLines());
        indexParser.setMinValue(statistics.getMin());
        indexParser.setMaxValue(statistics.getMax());
        final IntegerStore newType = indexParser.findBestType();
        log.trace("Decided for {}", newType);
        mapping.applyToStore(stringStore, newType);
        stringStore.setIndexStore(newType);
        subJob.report(1);
    }
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Example 3 with StringStore

use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.

the class StringParser method decideType.

@Override
protected StringStore decideType() {
    // check if a singleton type is enough
    if (strings.isEmpty()) {
        return EmptyStore.INSTANCE;
    }
    // Is this a singleton?
    if (strings.size() == 1) {
        StringTypeSingleton type = new StringTypeSingleton(strings.keySet().iterator().next(), BitSetStore.create(getLines()));
        return type;
    }
    // remove prefix and suffix
    if (!StringUtils.isEmpty(prefix) || !StringUtils.isEmpty(suffix)) {
        log.debug("Reduced strings by the '{}' prefix and '{}' suffix", prefix, suffix);
        Object2IntMap<String> oldStrings = strings;
        strings = new Object2IntOpenHashMap<>(oldStrings.size());
        for (Object2IntMap.Entry<String> e : oldStrings.object2IntEntrySet()) {
            strings.put(e.getKey().substring(prefix.length(), e.getKey().length() - suffix.length()), e.getIntValue());
        }
    }
    decode();
    // Try all guesses and select the least memory intensive one.
    // TODO FK: Simplify this, the guessers do a lot of weird lazy computation but implicit.
    Guess guess = Stream.of(new TrieTypeGuesser(this), new MapTypeGuesser(this), new NumberTypeGuesser(this, getConfig())).map(StringTypeGuesser::createGuess).filter(Objects::nonNull).min(Comparator.naturalOrder()).orElseThrow();
    log.debug("\tUsing {}(est. {})", guess.getGuesser(), BinaryByteUnit.format(guess.estimate()));
    StringStore result = guess.getType();
    // wrap in prefix suffix
    if (!Strings.isNullOrEmpty(prefix) || !Strings.isNullOrEmpty(suffix)) {
        result = new StringTypePrefixSuffix(result, prefix, suffix);
    }
    return result;
}
Also used : Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) Guess(com.bakdata.conquery.models.preproc.parser.specific.string.StringTypeGuesser.Guess) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) ToString(lombok.ToString) MapTypeGuesser(com.bakdata.conquery.models.preproc.parser.specific.string.MapTypeGuesser) StringTypeSingleton(com.bakdata.conquery.models.events.stores.specific.string.StringTypeSingleton) TrieTypeGuesser(com.bakdata.conquery.models.preproc.parser.specific.string.TrieTypeGuesser) Objects(java.util.Objects) NumberTypeGuesser(com.bakdata.conquery.models.preproc.parser.specific.string.NumberTypeGuesser) StringTypePrefixSuffix(com.bakdata.conquery.models.events.stores.specific.string.StringTypePrefixSuffix)

Example 4 with StringStore

use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.

the class Preprocessed method collectDictionaries.

private static Map<String, Dictionary> collectDictionaries(Map<String, ColumnStore> columnStores) {
    final Map<String, Dictionary> collect = new HashMap<>();
    for (Map.Entry<String, ColumnStore> entry : columnStores.entrySet()) {
        if (!(entry.getValue() instanceof StringStore)) {
            continue;
        }
        final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary();
        if (dictionary == null) {
            continue;
        }
        collect.put(entry.getKey(), dictionary);
    }
    return collect;
}
Also used : Dictionary(com.bakdata.conquery.models.dictionary.Dictionary) ColumnStore(com.bakdata.conquery.models.events.stores.root.ColumnStore) HashMap(java.util.HashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) HashMap(java.util.HashMap) Int2IntAVLTreeMap(it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 5 with StringStore

use of com.bakdata.conquery.models.events.stores.root.StringStore in project conquery by bakdata.

the class MultiSelectFilterNode method findIds.

private int[] findIds(Bucket bucket, String[] values) {
    int[] selectedValues = new int[values.length];
    StringStore type = (StringStore) bucket.getStore(getColumn());
    for (int index = 0; index < values.length; index++) {
        String select = values[index];
        int parsed = type.getId(select);
        selectedValues[index] = parsed;
    }
    return selectedValues;
}
Also used : StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) ToString(lombok.ToString)

Aggregations

StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)7 ColumnStore (com.bakdata.conquery.models.events.stores.root.ColumnStore)3 Map (java.util.Map)2 ToString (lombok.ToString)2 Column (com.bakdata.conquery.models.datasets.Column)1 ConceptTreeCache (com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeCache)1 ConceptTreeChild (com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild)1 TreeConcept (com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept)1 Dictionary (com.bakdata.conquery.models.dictionary.Dictionary)1 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)1 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)1 StringTypePrefixSuffix (com.bakdata.conquery.models.events.stores.specific.string.StringTypePrefixSuffix)1 StringTypeSingleton (com.bakdata.conquery.models.events.stores.specific.string.StringTypeSingleton)1 ConceptConfigurationException (com.bakdata.conquery.models.exceptions.ConceptConfigurationException)1 IntegerParser (com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)1 MapTypeGuesser (com.bakdata.conquery.models.preproc.parser.specific.string.MapTypeGuesser)1 NumberTypeGuesser (com.bakdata.conquery.models.preproc.parser.specific.string.NumberTypeGuesser)1 Guess (com.bakdata.conquery.models.preproc.parser.specific.string.StringTypeGuesser.Guess)1 TrieTypeGuesser (com.bakdata.conquery.models.preproc.parser.specific.string.TrieTypeGuesser)1 CalculatedValue (com.bakdata.conquery.util.CalculatedValue)1