use of org.apache.druid.data.input.MapPopulator in project druid by druid-io.
the class UriCacheGenerator method generateCache.
@Override
@Nullable
public CacheScheduler.VersionedCache generateCache(final UriExtractionNamespace extractionNamespace, final CacheScheduler.EntryImpl<UriExtractionNamespace> entryId, @Nullable final String lastVersion, final CacheScheduler scheduler) throws Exception {
final boolean doSearch = extractionNamespace.getUriPrefix() != null;
final URI originalUri = doSearch ? extractionNamespace.getUriPrefix() : extractionNamespace.getUri();
final SearchableVersionedDataFinder<URI> pullerRaw = pullers.get(originalUri.getScheme());
if (pullerRaw == null) {
throw new IAE("Unknown loader type[%s]. Known types are %s", originalUri.getScheme(), pullers.keySet());
}
if (!(pullerRaw instanceof URIDataPuller)) {
throw new IAE("Cannot load data from location [%s]. Data pulling from [%s] not supported", originalUri, originalUri.getScheme());
}
final URIDataPuller puller = (URIDataPuller) pullerRaw;
final URI uri;
if (doSearch) {
final Pattern versionRegex;
if (extractionNamespace.getFileRegex() != null) {
versionRegex = Pattern.compile(extractionNamespace.getFileRegex());
} else {
versionRegex = null;
}
uri = pullerRaw.getLatestVersion(extractionNamespace.getUriPrefix(), versionRegex);
if (uri == null) {
throw new FileNotFoundException(StringUtils.format("Could not find match for pattern `%s` in [%s] for %s", versionRegex, originalUri, extractionNamespace));
}
} else {
uri = extractionNamespace.getUri();
}
return RetryUtils.retry(() -> {
final String version = puller.getVersion(uri);
try {
// Important to call equals() against version because lastVersion could be null
if (version.equals(lastVersion)) {
log.debug("URI [%s] for [%s] has the same last modified time [%s] as the last cached. " + "Skipping ", uri.toString(), entryId, version);
return null;
}
} catch (NumberFormatException ex) {
log.debug(ex, "Failed to get last modified timestamp. Assuming no timestamp");
}
final ByteSource source = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return CompressionUtils.decompress(puller.getInputStream(uri), uri.getPath());
}
};
final CacheScheduler.VersionedCache versionedCache = scheduler.createVersionedCache(entryId, version);
try {
final long startNs = System.nanoTime();
final MapPopulator.PopulateResult populateResult = new MapPopulator<>(extractionNamespace.getNamespaceParseSpec().getParser()).populateAndWarnAtByteLimit(source, versionedCache.getCache(), (long) (MAX_MEMORY * extractionNamespace.getMaxHeapPercentage() / 100.0), null == entryId ? null : entryId.toString());
final long duration = System.nanoTime() - startNs;
log.info("Finished loading %,d values (%d bytes) from %,d lines for [%s] in %,d ns", populateResult.getEntries(), populateResult.getBytes(), populateResult.getLines(), entryId, duration);
return versionedCache;
} catch (Throwable t) {
try {
versionedCache.close();
} catch (Exception e) {
t.addSuppressed(e);
}
throw t;
}
}, puller.shouldRetryPredicate(), DEFAULT_NUM_RETRIES);
}
Aggregations