use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class Column method createSharedDictionaryReplacement.
/**
* Creates an id-replacement mapping for shared dictionaries for an {@link Import}.
* Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not
* they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this
* generated mapping.
* <p>
* In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is
* created if not.
*
* @param dicts The mapping of column names in the Import to dictionaries in the Import
* @param storage The {@link NamespaceStorage} that backs the dictionaries
* @param out The collection for the generated replacement, that are needed during the deserialization of the next
* part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file
* @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries.
*/
public void createSharedDictionaryReplacement(Map<String, Dictionary> dicts, NamespaceStorage storage, Map<DictionaryId, Dictionary> out, IdMutex<DictionaryId> sharedDictionaryLocks) {
Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column.");
Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns");
// If the column is based on a shared dict. We reference a new empty dictionary or the existing one
// but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings.
Dictionary sharedDict = null;
final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary());
try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) {
sharedDict = storage.getDictionary(sharedDictId);
// Create dictionary if not yet present
if (sharedDict == null) {
sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary());
storage.updateDictionary(sharedDict);
}
}
out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict);
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class UIProcessor method getTableStatistics.
public TableStatistics getTableStatistics(Table table) {
final NamespaceStorage storage = getDatasetRegistry().get(table.getDataset().getId()).getStorage();
List<Import> imports = table.findImports(storage).collect(Collectors.toList());
final long entries = imports.stream().mapToLong(Import::getNumberOfEntries).sum();
return new TableStatistics(table, entries, // total size of dictionaries
imports.stream().flatMap(imp -> imp.getDictionaries().stream()).filter(Objects::nonNull).map(storage::getDictionary).mapToLong(Dictionary::estimateMemoryConsumption).sum(), // total size of entries
imports.stream().mapToLong(Import::estimateMemoryConsumption).sum(), // Total size of CBlocks
imports.stream().mapToLong(imp -> calculateCBlocksSizeBytes(imp, storage.getAllConcepts())).sum(), imports);
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class ImportJob method createOrUpdate.
public static ImportJob createOrUpdate(Namespace namespace, InputStream inputStream, int entityBucketSize, IdMutex<DictionaryId> sharedDictionaryLocks, ConqueryConfig config, boolean update) throws IOException {
try (PreprocessedReader parser = new PreprocessedReader(inputStream)) {
final Dataset ds = namespace.getDataset();
// We parse semi-manually as the incoming file consist of multiple documents we only read progressively:
// 1) the header to check metadata
// 2) The Dictionaries to be imported and transformed
// 3) The ColumnStores themselves which contain references to the previously imported dictionaries.
final PreprocessedHeader header = parser.readHeader();
final TableId tableId = new TableId(ds.getId(), header.getTable());
Table table = namespace.getStorage().getTable(tableId);
if (table == null) {
throw new BadRequestException(String.format("Table[%s] does not exist.", tableId));
}
// Ensure that Import and Table have the same schema
header.assertMatch(table);
final ImportId importId = new ImportId(table.getId(), header.getName());
Import processedImport = namespace.getStorage().getImport(importId);
if (update) {
if (processedImport == null) {
throw new WebApplicationException(String.format("Import[%s] is not present.", importId), Response.Status.NOT_FOUND);
}
// before updating the import, make sure that all workers removed the last import
namespace.sendToAll(new RemoveImportJob(processedImport));
namespace.getStorage().removeImport(importId);
} else if (processedImport != null) {
throw new WebApplicationException(String.format("Import[%s] is already present.", importId), Response.Status.CONFLICT);
}
log.trace("Begin reading Dictionaries");
parser.addReplacement(Dataset.PLACEHOLDER.getId(), ds);
PreprocessedDictionaries dictionaries = parser.readDictionaries();
Map<DictionaryId, Dictionary> dictReplacements = createLocalIdReplacements(dictionaries.getDictionaries(), table, header.getName(), namespace.getStorage(), sharedDictionaryLocks);
// We inject the mappings into the parser, so that the incoming placeholder names are replaced with the new names of the dictionaries. This allows us to use NsIdRef in conjunction with shared-Dictionaries
parser.addAllReplacements(dictReplacements);
log.trace("Begin reading data.");
PreprocessedData container = parser.readData();
log.debug("Done reading data. Contains {} Entities.", container.size());
log.info("Importing {} into {}", header.getName(), tableId);
return new ImportJob(namespace, table, entityBucketSize, header, dictionaries, container, config);
}
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class NamespaceStorage method getPrimaryDictionaryRaw.
@NonNull
public Dictionary getPrimaryDictionaryRaw() {
final Dictionary dictionary = primaryDictionary.get();
if (dictionary == null) {
log.trace("No prior PrimaryDictionary, creating one");
final MapDictionary newPrimary = new MapDictionary(getDataset(), ConqueryConstants.PRIMARY_DICTIONARY);
primaryDictionary.update(newPrimary);
return newPrimary;
}
return dictionary;
}
use of com.bakdata.conquery.models.dictionary.Dictionary in project conquery by bakdata.
the class XodusStoreFactory method createDictionaryStore.
@Override
public IdentifiableStore<Dictionary> createDictionaryStore(CentralRegistry centralRegistry, String pathName) {
final Environment environment = findEnvironment(pathName);
final BigStore<IId<Dictionary>, Dictionary> bigStore;
synchronized (openStoresInEnv) {
bigStore = new BigStore<>(this, validator, environment, DICTIONARIES.storeInfo(), this::closeStore, this::removeStore, centralRegistry.injectIntoNew(objectMapper));
openStoresInEnv.put(bigStore.getDataXodusStore().getEnvironment(), bigStore.getDataXodusStore());
openStoresInEnv.put(bigStore.getMetaXodusStore().getEnvironment(), bigStore.getMetaXodusStore());
}
if (useWeakDictionaryCaching) {
return StoreMappings.identifiableCachedStore(new WeakCachedStore<>(bigStore, getWeakCacheDuration()), centralRegistry);
}
return StoreMappings.identifiable(StoreMappings.cached(bigStore), centralRegistry);
}
Aggregations