use of org.apache.sling.samples.webloader.WebloaderException in project sling by apache.
the class WebloaderJob method run.
@Override
public void run() {
log.debug("Job thread starting: {}", this);
// TODO should use a session provided by client, but can we use it
// safely for our async job?
session = null;
if (storagePath.charAt(0) == '/') {
storagePath = storagePath.substring(1);
}
final String absStoragePath = "/" + storagePath;
try {
session = repository.loginAdministrative(null);
if (session.itemExists(absStoragePath)) {
final Item i = session.getItem(absStoragePath);
if (i.isNode()) {
storageRoot = (Node) i;
} else {
throw new WebloaderException("Item at " + storagePath + " is not a Node");
}
} else {
// TODO deep-create hierarchy if needed
storageRoot = session.getRootNode().addNode(storagePath);
session.save();
}
int offset = 0;
for (String type : filetypes) {
final URL[] urls = getDocumentUrlsFromGoogle(type, offset);
for (URL url : urls) {
try {
getAndStoreDocument(url);
session.save();
numDocsLoaded++;
if (numDocsLoaded >= maxDocsToRetrieve) {
break;
}
} catch (DocTooBigException dtb) {
log.info(dtb.getMessage());
} catch (Exception e) {
log.warn("Exception while retrieving url " + url, e);
} finally {
session.refresh(false);
}
}
offset += 10;
if (numDocsLoaded >= maxDocsToRetrieve) {
break;
}
}
statusInfo = "All done.";
} catch (Exception e) {
error = e;
log.warn("Exception in WebloaderJob.run()", e);
statusInfo = "Exception while running job: " + e;
} finally {
if (session != null) {
session.logout();
}
statusDetails = "";
running = false;
}
if (numDocsLoaded >= maxDocsToRetrieve) {
log.info("Stopped after retrieving maximum number of documents ({})", maxDocsToRetrieve);
}
log.info("Job thread ends: {}, {} documents loaded", this, numDocsLoaded);
}
Aggregations