Search in sources :

Example 1 with IntegerStore

use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.

the class MapTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    IntegerStore indexType = p.decideIndexType();
    StringTypeDictionary type = new StringTypeDictionary(indexType, null);
    long mapSize = MapDictionary.estimateMemoryConsumption(p.getStrings().size(), p.getDecoded().stream().mapToLong(s -> s.length).sum());
    StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
    return new Guess(result, indexType.estimateMemoryConsumptionBytes(), mapSize) {

        @Override
        public StringStore getType() {
            MapDictionary map = new MapDictionary(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
            for (byte[] v : p.getDecoded()) {
                map.add(v);
            }
            type.setDictionary(map);
            return super.getType();
        }
    };
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) StringTypeDictionary(com.bakdata.conquery.models.events.stores.specific.string.StringTypeDictionary) StringTypeEncoded(com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded) MapDictionary(com.bakdata.conquery.models.dictionary.MapDictionary)

Example 2 with IntegerStore

use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.

the class NumberTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    // check if the remaining strings are all numbers
    try {
        Range<Integer> range = new IntegerRange(0, 0);
        IntegerParser numberParser = new IntegerParser(config);
        int[] intMap = new int[p.getStrings().size()];
        Arrays.fill(intMap, -1);
        for (Entry<String, Integer> e : p.getStrings().entrySet()) {
            int intValue = Integer.parseInt(e.getKey());
            // check that there are no leading zeroes that we would destroy
            if (e.getKey().startsWith("0") && !e.getKey().equals("0")) {
                return null;
            }
            intMap[e.getValue()] = intValue;
            range = range.span(new IntegerRange(intValue, intValue));
            numberParser.addLine((long) intValue);
        }
        numberParser.setLines(p.getLines());
        // do not use a number type if the range is much larger than the number if distinct values
        // e.g. if the column contains only 0 and 5M
        int span = range.getMax() - range.getMin() + 1;
        if (span > p.getStrings().size()) {
            return null;
        }
        IntegerStore decision = numberParser.findBestType();
        Range<Integer> finalRange = range;
        return new Guess(null, decision.estimateMemoryConsumptionBytes(), 0) {

            @Override
            public StringStore getType() {
                Int2ObjectMap<String> inverse = new Int2ObjectOpenHashMap<>(p.getStrings().size());
                p.getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key));
                final StringTypeNumber type = new StringTypeNumber(finalRange, decision, inverse);
                return type;
            }
        };
    } catch (NumberFormatException e) {
        return null;
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntegerRange(com.bakdata.conquery.models.common.Range.IntegerRange) StringTypeNumber(com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Example 3 with IntegerStore

use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.

the class MoneyParser method decideType.

@Override
protected MoneyStore decideType() {
    IntegerParser subParser = new IntegerParser(getConfig());
    subParser.registerValue(maxValue);
    subParser.registerValue(minValue);
    subParser.setLines(getLines());
    subParser.setNullLines(getNullLines());
    IntegerStore subDecision = subParser.findBestType();
    return new MoneyIntStore(subDecision);
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) MoneyIntStore(com.bakdata.conquery.models.events.stores.specific.MoneyIntStore)

Example 4 with IntegerStore

use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.

the class ImportJob method applyDictionaryMappings.

/**
 * Apply new positions into incoming shared dictionaries.
 */
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
    final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
    for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
        final String columnName = entry.getKey();
        final DictionaryMapping mapping = entry.getValue();
        final StringStore stringStore = (StringStore) values.get(columnName);
        log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
        // we need to find a new Type for the index-Column as it's going to be remapped and might change in size
        final IntegerParser indexParser = new IntegerParser(config);
        final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
        indexParser.setLines(stringStore.getLines());
        indexParser.setMinValue(statistics.getMin());
        indexParser.setMaxValue(statistics.getMax());
        final IntegerStore newType = indexParser.findBestType();
        log.trace("Decided for {}", newType);
        mapping.applyToStore(stringStore, newType);
        stringStore.setIndexStore(newType);
        subJob.report(1);
    }
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Example 5 with IntegerStore

use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.

the class TrieTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    IntegerStore indexType = p.decideIndexType();
    SuccinctTrie trie = new SuccinctTrie(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
    StringTypeDictionary type = new StringTypeDictionary(indexType, trie);
    for (byte[] v : p.getDecoded()) {
        trie.add(v);
    }
    StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
    return new Guess(result, indexType.estimateMemoryConsumptionBytes(), trie.estimateMemoryConsumption()) {

        @Override
        public StringStore getType() {
            trie.compress();
            return super.getType();
        }
    };
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) StringTypeDictionary(com.bakdata.conquery.models.events.stores.specific.string.StringTypeDictionary) StringTypeEncoded(com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded) SuccinctTrie(com.bakdata.conquery.util.dict.SuccinctTrie)

Aggregations

IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)9 Test (org.junit.jupiter.api.Test)3 StringTypeDictionary (com.bakdata.conquery.models.events.stores.specific.string.StringTypeDictionary)2 StringTypeEncoded (com.bakdata.conquery.models.events.stores.specific.string.StringTypeEncoded)2 IntegerParser (com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)2 IntegerRange (com.bakdata.conquery.models.common.Range.IntegerRange)1 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)1 MapDictionary (com.bakdata.conquery.models.dictionary.MapDictionary)1 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)1 DecimalTypeScaled (com.bakdata.conquery.models.events.stores.specific.DecimalTypeScaled)1 MoneyIntStore (com.bakdata.conquery.models.events.stores.specific.MoneyIntStore)1 StringTypeNumber (com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber)1 SuccinctTrie (com.bakdata.conquery.util.dict.SuccinctTrie)1 ProgressReporter (com.bakdata.conquery.util.progressreporter.ProgressReporter)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 BigInteger (java.math.BigInteger)1