use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class ImportJob method importDictionaries.
/**
* Import all dictionaries. Shared dictionaries are merge into existing ones. All are distributed to corresponding workers.
* Create mappings for shared dictionaries dict.
* This is not synchronized because the methods is called within the job execution.
*/
private static Map<String, DictionaryMapping> importDictionaries(Namespace namespace, Map<String, Dictionary> dicts, Column[] columns, String importName) {
// Empty Maps are Coalesced to null by Jackson
if (dicts == null) {
return Collections.emptyMap();
}
final Map<String, DictionaryMapping> out = new HashMap<>();
log.trace("Importing Dictionaries");
for (Column column : columns) {
if (column.getType() != MajorTypeId.STRING) {
continue;
}
// Might not have an underlying Dictionary (eg Singleton, direct-Number)
// but could also be an error :/ Most likely the former
final Dictionary importDictionary = dicts.get(column.getName());
if (importDictionary == null) {
log.trace("No Dictionary for {}", column);
continue;
}
if (column.getSharedDictionary() == null) {
// Normal Dictionary -> no merge necessary, just distribute
distributeDictionary(namespace, importDictionary);
} else {
// It's a shared dictionary
final String sharedDictionaryName = column.getSharedDictionary();
log.trace("Column[{}.{}] part of shared Dictionary[{}]", importName, column.getName(), sharedDictionaryName);
final DictionaryId dictionaryId = new DictionaryId(namespace.getDataset().getId(), sharedDictionaryName);
final Dictionary sharedDictionary = namespace.getStorage().getDictionary(dictionaryId);
// This should never fail, becaus the dictionary is pre-created in the replacement generation step
ResourceUtil.throwNotFoundIfNull(dictionaryId, sharedDictionary);
log.trace("Merging into shared Dictionary[{}]", sharedDictionary);
DictionaryMapping mapping = DictionaryMapping.createAndImport(importDictionary, sharedDictionary);
if (mapping.getNumberOfNewIds() != 0) {
distributeDictionary(namespace, mapping.getTargetDictionary());
}
out.put(column.getName(), mapping);
}
}
return out;
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class ImportJob method importPrimaryDictionary.
private DictionaryMapping importPrimaryDictionary(Dictionary primaryDictionary) {
Dictionary orig = namespace.getStorage().getPrimaryDictionaryRaw();
Dictionary primaryDict = Dictionary.copyUncompressed(orig);
DictionaryMapping primaryMapping = DictionaryMapping.createAndImport(primaryDictionary, primaryDict);
log.debug("Mapped {} new ids", primaryMapping.getNumberOfNewIds());
// if no new ids we shouldn't recompress and store
if (primaryMapping.getNumberOfNewIds() == 0) {
log.trace("No new ids");
return primaryMapping;
}
namespace.getStorage().updatePrimaryDictionary(primaryDict);
return primaryMapping;
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class Column method createSingleColumnDictionaryReplacement.
/**
* See {@link Column#createSharedDictionaryReplacement(Map, NamespaceStorage, Map, IdMutex)}
*/
public void createSingleColumnDictionaryReplacement(Map<String, Dictionary> dicts, String importName, Map<DictionaryId, Dictionary> out) {
Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
Preconditions.checkArgument(sharedDictionary == null, "Cannot be used for Shared Dictionary based Columns.");
final Dictionary dict = dicts.get(getName());
final String name = computeDefaultDictionaryName(importName);
out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dict.getName()), dict);
dict.setDataset(table.getDataset());
dict.setName(name);
}
Aggregations