use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class XmlSitemapReader method parseSitemapUrlNode.
void parseSitemapUrlNode(NodeList url, String sitemapIndexUrl) {
Document document = new Document();
if (!Strings.isNullOrEmpty(sitemapIndexUrl)) {
document.addField("sitemapIndexUrl", sitemapIndexUrl);
}
for (int k = 0; k < url.getLength(); k++) {
Node noder = url.item(k);
if (1 == noder.getNodeType()) {
String name = noder.getNodeName();
String value = noder.getFirstChild().getNodeValue();
document.addField(name, value);
}
}
executer.document(document);
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class TokenizerFilter method document.
@Override
public void document(Document document) {
for (Field field : document.getFields()) {
for (String value : field.getValues()) {
StringTokenizer tokenizer = new StringTokenizer(value, delim);
while (tokenizer.hasMoreElements()) {
String token = tokenizer.nextToken();
Document newDocument = new Document();
newDocument.addField("token", token);
super.document(newDocument);
}
}
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class TopicAggregationFilter method end.
@Override
public void end() {
for (Map.Entry<String, Document> entry : docs.entrySet()) {
Document doc = entry.getValue();
super.document(doc);
}
;
super.end();
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class TopicAggregationFilter method mergeDocuments.
protected Document mergeDocuments(Document doc1, Document doc2) {
Map<String, HashSet<String>> mergedFields = new HashMap<String, HashSet<String>>();
for (Field field : doc1.getFields()) {
mergedFields.put(field.getName(), new HashSet<String>(field.getValues()));
}
for (Field field : doc2.getFields()) {
if (!mergedFields.containsKey(field.getName())) {
mergedFields.put(field.getName(), new HashSet<String>());
}
mergedFields.get(field.getName()).addAll(field.getValues());
}
Document mergedDoc = new Document();
for (Map.Entry<String, HashSet<String>> entry : mergedFields.entrySet()) {
Field mergedField = new Field(entry.getKey(), new ArrayList<String>(entry.getValue()));
mergedDoc.addField(mergedField);
}
return mergedDoc;
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class TopicMergeFilter method document.
@Override
public void document(Document document) {
String topic = document.getFieldValue(fieldTopic);
String value = document.getFieldValue(fieldValue);
if (fieldValueLowercase) {
value = value.toLowerCase();
}
if (!topicValues.containsKey(topic)) {
topicValues.put(topic, new HashMap<String, Document>());
}
if (!topicValues.get(topic).containsKey(value)) {
// add
topicValues.get(topic).put(value, document);
} else {
// merge
Document oldDoc = topicValues.get(topic).get(value);
Document mergedDoc = mergeDocuments(oldDoc, document);
topicValues.get(topic).put(value, mergedDoc);
}
}
Aggregations