use of com.virjar.vscrawler.core.selector.combine.AbstractSelectable in project vscrawler by virjar.
the class ModelExtractor method process.
@SuppressWarnings("unchecked")
public void process(Seed seed, String content, GrabResult crawlResult, AbstractAutoProcessModel model, AbstractSelectable abstractSelectable, boolean save) {
String url = model.getBaseUrl();
if (StringUtils.isBlank(url)) {
try {
new URI(seed.getData());
url = seed.getData();
model.setBaseUrl(url);
} catch (Exception e) {
// ignore
}
}
AbstractSelectable baseSelectable = abstractSelectable;
if (baseSelectable == null) {
baseSelectable = AbstractSelectable.createModel(url, content);
}
Iterator<AbstractSelectable> iterator = rootSelector.select(baseSelectable).toMultiSelectable().iterator();
if (!iterator.hasNext()) {
return;
}
boolean hasRetry = false;
// 支持单一网页抽取多个模型
while (true) {
AbstractSelectable next = iterator.next();
model.setRawText(content);
model.setOriginSelectable(next);
model.setSeed(seed);
model.setBaseUrl(url);
if (!model.hasGrabSuccess()) {
if (!hasRetry) {
seed.retry();
hasRetry = true;
}
continue;
}
model.beforeAutoFetch();
List<Seed> newSeeds = fetchTaskProcessor.injectField(model, next, crawlResult);
model.afterAutoFetch();
newSeeds.addAll(model.newSeeds());
crawlResult.addSeeds(newSeeds);
if (save) {
crawlResult.addResult(model);
}
if (iterator.hasNext()) {
model = ObjectFactory.newInstance(aClass);
} else {
break;
}
}
}
use of com.virjar.vscrawler.core.selector.combine.AbstractSelectable in project vscrawler by virjar.
the class XpathNode method toMultiSelectable.
@Override
public List<AbstractSelectable> toMultiSelectable() {
SipNodes sipNodes = createOrGetModel();
List<AbstractSelectable> ret = Lists.newLinkedList();
for (final SIPNode sipNode : sipNodes) {
XpathNode xpathNode;
if (sipNode.isText()) {
xpathNode = new XpathNode(getBaseUrl(), sipNode.getTextVal());
} else {
xpathNode = new XpathNode(getBaseUrl(), new RawTextStringFactory() {
@Override
public String rawText() {
return sipNode.toString();
}
});
}
xpathNode.setModel(new SipNodes(sipNode));
ret.add(xpathNode);
}
return ret;
}
use of com.virjar.vscrawler.core.selector.combine.AbstractSelectable in project vscrawler by virjar.
the class JsonNode method toMultiSelectable.
@Override
public List<AbstractSelectable> toMultiSelectable() {
List<JSON> models = createOrGetModel();
List<AbstractSelectable> ret = Lists.newLinkedList();
for (final JSON json : models) {
JsonNode jsonNode = new JsonNode(getBaseUrl(), new RawTextStringFactory() {
@Override
public String rawText() {
return json.toJSONString();
}
});
jsonNode.setModel(Lists.newArrayList(json));
ret.add(jsonNode);
}
return ret;
}
Aggregations