use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.
the class MapTypeGuesser method createGuess.
@Override
public Guess createGuess() {
IntegerStore indexType = p.decideIndexType();
StringTypeDictionary type = new StringTypeDictionary(indexType, null);
long mapSize = MapDictionary.estimateMemoryConsumption(p.getStrings().size(), p.getDecoded().stream().mapToLong(s -> s.length).sum());
StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
return new Guess(result, indexType.estimateMemoryConsumptionBytes(), mapSize) {
@Override
public StringStore getType() {
MapDictionary map = new MapDictionary(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
for (byte[] v : p.getDecoded()) {
map.add(v);
}
type.setDictionary(map);
return super.getType();
}
};
}
use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.
the class NumberTypeGuesser method createGuess.
@Override
public Guess createGuess() {
// check if the remaining strings are all numbers
try {
Range<Integer> range = new IntegerRange(0, 0);
IntegerParser numberParser = new IntegerParser(config);
int[] intMap = new int[p.getStrings().size()];
Arrays.fill(intMap, -1);
for (Entry<String, Integer> e : p.getStrings().entrySet()) {
int intValue = Integer.parseInt(e.getKey());
// check that there are no leading zeroes that we would destroy
if (e.getKey().startsWith("0") && !e.getKey().equals("0")) {
return null;
}
intMap[e.getValue()] = intValue;
range = range.span(new IntegerRange(intValue, intValue));
numberParser.addLine((long) intValue);
}
numberParser.setLines(p.getLines());
// do not use a number type if the range is much larger than the number if distinct values
// e.g. if the column contains only 0 and 5M
int span = range.getMax() - range.getMin() + 1;
if (span > p.getStrings().size()) {
return null;
}
IntegerStore decision = numberParser.findBestType();
Range<Integer> finalRange = range;
return new Guess(null, decision.estimateMemoryConsumptionBytes(), 0) {
@Override
public StringStore getType() {
Int2ObjectMap<String> inverse = new Int2ObjectOpenHashMap<>(p.getStrings().size());
p.getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key));
final StringTypeNumber type = new StringTypeNumber(finalRange, decision, inverse);
return type;
}
};
} catch (NumberFormatException e) {
return null;
}
}
use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.
the class MoneyParser method decideType.
@Override
protected MoneyStore decideType() {
IntegerParser subParser = new IntegerParser(getConfig());
subParser.registerValue(maxValue);
subParser.registerValue(minValue);
subParser.setLines(getLines());
subParser.setNullLines(getNullLines());
IntegerStore subDecision = subParser.findBestType();
return new MoneyIntStore(subDecision);
}
use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.
the class ImportJob method applyDictionaryMappings.
/**
* Apply new positions into incoming shared dictionaries.
*/
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
final String columnName = entry.getKey();
final DictionaryMapping mapping = entry.getValue();
final StringStore stringStore = (StringStore) values.get(columnName);
log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
// we need to find a new Type for the index-Column as it's going to be remapped and might change in size
final IntegerParser indexParser = new IntegerParser(config);
final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
indexParser.setLines(stringStore.getLines());
indexParser.setMinValue(statistics.getMin());
indexParser.setMaxValue(statistics.getMax());
final IntegerStore newType = indexParser.findBestType();
log.trace("Decided for {}", newType);
mapping.applyToStore(stringStore, newType);
stringStore.setIndexStore(newType);
subJob.report(1);
}
}
use of com.bakdata.conquery.models.events.stores.root.IntegerStore in project conquery by bakdata.
the class TrieTypeGuesser method createGuess.
@Override
public Guess createGuess() {
IntegerStore indexType = p.decideIndexType();
SuccinctTrie trie = new SuccinctTrie(Dataset.PLACEHOLDER, UUID.randomUUID().toString());
StringTypeDictionary type = new StringTypeDictionary(indexType, trie);
for (byte[] v : p.getDecoded()) {
trie.add(v);
}
StringTypeEncoded result = new StringTypeEncoded(type, p.getEncoding());
return new Guess(result, indexType.estimateMemoryConsumptionBytes(), trie.estimateMemoryConsumption()) {
@Override
public StringStore getType() {
trie.compress();
return super.getType();
}
};
}
Aggregations