use of net.yacy.grid.io.messages.GridQueue in project yacy_grid_crawler by yacy.
the class Crawler method newLoaderAction.
/**
* Create a new loader action. This action contains all follow-up actions after
* loading to create a steering of parser, indexing and follow-up crawler actions.
* @param id the crawl id
* @param urls the urls which are part of the same actions
* @param depth the depth of the crawl step (0 is start depth)
* @param retry the number of load re-tries (0 is no retry, shows that this is the first attempt)
* @param timestamp the current time when the crawler created the action
* @param partition unique number of the url set partition. This is used to create asset names.
* @param doCrawling flag: if true, create a follow-up crawling action. set this to false to terminate crawling afterwards
* @param doIndexing flag: if true, do an indexing after loading. set this to false if the purpose is only a follow-up crawl after parsing
* @return the action json
* @throws IOException
*/
public static JSONObject newLoaderAction(int priority, String id, JSONArray urls, int depth, int retry, long timestamp, int partition, boolean doCrawling, boolean doIndexing) throws IOException {
// create file names for the assets: this uses depth and partition information
// we must create this here to prevent concurrency bugs which are there in the date formatter :((
SimpleDateFormat FORMAT_TIMEF = new SimpleDateFormat(PATTERN_TIMEF, Locale.US);
String namestub = id + "/d" + intf(depth) + "-t" + FORMAT_TIMEF.format(new Date(timestamp)) + "-p" + intf(partition);
String warcasset = namestub + ".warc.gz";
String webasset = namestub + ".web.jsonlist";
String graphasset = namestub + ".graph.jsonlist";
String hashKey = new MultiProtocolURL(urls.getString(0)).getHost();
// create actions to be done in reverse order:
// at the end of the processing we simultaneously place actions on the indexing and crawling queue
JSONArray postParserActions = new JSONArray();
// one or both must be true; doing none of that does not make sense
assert doIndexing || doCrawling;
// if all of the urls shall be indexed (see indexing patterns) then do indexing actions
if (doIndexing) {
GridQueue indexerQueueName = Data.gridBroker.queueName(YaCyServices.indexer, YaCyServices.indexer.getQueues(), ShardingMethod.BALANCE, INDEXER_PRIORITY_DIMENSIONS, priority, hashKey);
postParserActions.put(new JSONObject(true).put("type", YaCyServices.indexer.name()).put("queue", indexerQueueName.name()).put("id", id).put("sourceasset", webasset));
}
// if all of the urls shall be crawled at depth + 1, add a crawling action. Don't do this only if the crawling depth is at the depth limit.
if (doCrawling) {
GridQueue crawlerQueueName = Data.gridBroker.queueName(YaCyServices.crawler, YaCyServices.crawler.getQueues(), ShardingMethod.BALANCE, CRAWLER_PRIORITY_DIMENSIONS, priority, hashKey);
postParserActions.put(new JSONObject(true).put("type", YaCyServices.crawler.name()).put("queue", crawlerQueueName.name()).put("id", id).put("depth", depth + 1).put("sourcegraph", graphasset));
}
// bevor that and after loading we have a parsing action
GridQueue parserQueueName = Data.gridBroker.queueName(YaCyServices.parser, YaCyServices.parser.getQueues(), ShardingMethod.BALANCE, PARSER_PRIORITY_DIMENSIONS, priority, hashKey);
JSONArray parserActions = new JSONArray().put(new JSONObject(true).put("type", YaCyServices.parser.name()).put("queue", parserQueueName.name()).put("id", id).put("sourceasset", warcasset).put("targetasset", webasset).put("targetgraph", graphasset).put("actions", // actions after parsing
postParserActions));
// at the beginning of the process, we do a loading.
GridQueue loaderQueueName = Data.gridBroker.queueName(YaCyServices.loader, YaCyServices.loader.getQueues(), ShardingMethod.BALANCE, LOADER_PRIORITY_DIMENSIONS, priority, hashKey);
JSONObject loaderAction = new JSONObject(true).put("type", YaCyServices.loader.name()).put("queue", loaderQueueName.name()).put("id", id).put("urls", urls).put("targetasset", warcasset).put("actions", // actions after loading
parserActions);
return loaderAction;
}
use of net.yacy.grid.io.messages.GridQueue in project yacy_grid_mcp by yacy.
the class AbstractBrokerListener method loadNextAction.
private void loadNextAction(SusiAction action, JSONArray data) throws UnsupportedOperationException, IOException {
String type = action.getStringAttr("type");
if (type == null || type.length() == 0)
throw new UnsupportedOperationException("missing type in action");
String queue = action.getStringAttr("queue");
if (queue == null || queue.length() == 0)
throw new UnsupportedOperationException("missing queue in action");
// create a new Thought and push it to the next queue
JSONObject nextProcess = new JSONObject().put("data", data).put("actions", new JSONArray().put(action.toJSONClone()));
byte[] b = nextProcess.toString(2).getBytes(StandardCharsets.UTF_8);
Data.gridBroker.send(YaCyServices.valueOf(type), new GridQueue(queue), b);
}
use of net.yacy.grid.io.messages.GridQueue in project yacy_grid_mcp by yacy.
the class PeekService method serviceImpl.
@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
String serviceName = call.get("serviceName", "");
String queueName = call.get("queueName", "");
JSONObject json = new JSONObject(true);
if (serviceName.length() > 0 && queueName.length() > 0) {
try {
YaCyServices service = YaCyServices.valueOf(serviceName);
GridQueue queue = new GridQueue(queueName);
AvailableContainer available = Data.gridBroker.available(service, queue);
int ac = available.getAvailable();
String url = available.getFactory().getConnectionURL();
if (url != null)
json.put(ObjectAPIHandler.SERVICE_KEY, url);
if (ac > 0) {
// load one message and send it right again to prevent that it is lost
MessageContainer<byte[]> message = Data.gridBroker.receive(service, queue, 3000);
// message can be null if a timeout occurred
if (message == null) {
json.put(ObjectAPIHandler.SUCCESS_KEY, false);
json.put(ObjectAPIHandler.COMMENT_KEY, "timeout");
} else {
// send it again asap!
Data.gridBroker.send(service, queue, message.getPayload());
// evaluate whats inside
String payload = message.getPayload() == null ? null : new String(message.getPayload(), StandardCharsets.UTF_8);
JSONObject payloadjson = payload == null ? null : new JSONObject(new JSONTokener(payload));
json.put(ObjectAPIHandler.AVAILABLE_KEY, ac);
json.put(ObjectAPIHandler.MESSAGE_KEY, payloadjson == null ? new JSONObject() : payloadjson);
json.put(ObjectAPIHandler.SUCCESS_KEY, true);
}
} else {
json.put(ObjectAPIHandler.AVAILABLE_KEY, 0);
json.put(ObjectAPIHandler.SUCCESS_KEY, true);
}
} catch (IOException e) {
json.put(ObjectAPIHandler.SUCCESS_KEY, false);
json.put(ObjectAPIHandler.COMMENT_KEY, e.getMessage());
}
} else {
json.put(ObjectAPIHandler.SUCCESS_KEY, false);
json.put(ObjectAPIHandler.COMMENT_KEY, "the request must contain a serviceName and a queueName");
}
return new ServiceResponse(json);
}
use of net.yacy.grid.io.messages.GridQueue in project yacy_grid_mcp by yacy.
the class SendService method serviceImpl.
@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
String serviceName = call.get("serviceName", "");
String queueName = call.get("queueName", "");
String message = call.get("message", "");
JSONObject json = new JSONObject(true);
if (serviceName.length() > 0 && queueName.length() > 0 && message.length() > 0) {
try {
QueueFactory<byte[]> factory = Data.gridBroker.send(YaCyServices.valueOf(serviceName), new GridQueue(queueName), message.getBytes(StandardCharsets.UTF_8));
String url = factory.getConnectionURL();
json.put(ObjectAPIHandler.SUCCESS_KEY, true);
if (url != null)
json.put(ObjectAPIHandler.SERVICE_KEY, url);
} catch (IOException e) {
json.put(ObjectAPIHandler.SUCCESS_KEY, false);
json.put(ObjectAPIHandler.COMMENT_KEY, e.getMessage());
}
} else {
json.put(ObjectAPIHandler.SUCCESS_KEY, false);
json.put(ObjectAPIHandler.COMMENT_KEY, "the request must contain a serviceName, a queueName and a message");
}
return new ServiceResponse(json);
}
use of net.yacy.grid.io.messages.GridQueue in project yacy_grid_mcp by yacy.
the class AbstractBrokerListener method run.
@Override
public void run() {
List<QueueListener> threads = new ArrayList<>();
int threadsPerQueue = Math.max(1, this.threads / this.queueNames.length);
Data.logger.info("Broker Listener: starting " + threadsPerQueue + " threads for each of the " + this.queueNames.length + " queues");
for (GridQueue queue : this.queueNames) {
for (int qc = 0; qc < threadsPerQueue; qc++) {
QueueListener listener = new QueueListener(queue, qc);
listener.start();
threads.add(listener);
Data.logger.info("Broker Listener for service " + this.service.name() + ", queue " + queue + " started thread " + qc);
}
}
threads.forEach(thread -> {
try {
thread.join();
Data.logger.info("Broker Listener for service " + this.service.name() + ", queue " + thread.queueName + " terminated");
} catch (InterruptedException e) {
Data.logger.info("Broker Listener for service " + this.service.name() + ", queue " + thread.queueName + " interrupted", e);
}
});
}
Aggregations