use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class JavaScriptFilter method document.
@Override
public void document(Document document) {
Context cx = Context.enter();
Scriptable scope = cx.initStandardObjects();
List<Document> output = new ArrayList<Document>();
ScriptableObject.putProperty(scope, "documentBuilder", Context.javaToJS(new DocumentBuilder(), scope));
ScriptableObject.putProperty(scope, "input", Context.javaToJS(document, scope));
ScriptableObject.putProperty(scope, "output", Context.javaToJS(output, scope));
cx.evaluateString(scope, script, filename, 1, null);
for (Document out : output) {
super.document(out);
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class OpenThesaurusReader method line2document.
void line2document(String[] lineParts) {
Document document = new Document();
document.setField("tokens", Arrays.asList(lineParts));
executer.document(document);
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class ThreadDumpReader method endDocument.
void endDocument() {
field("runId", runId);
field("description", currentDescription);
field("date", currentDate);
field("fileName", currentFileName);
field("directory", currentDirectory);
field("position", String.valueOf(position));
position++;
executer.document(currentDocument);
currentDocument = new Document();
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class ElasticJsonPathReader method read.
public void read() {
String response = "";
String pagedUrl = "";
String scrollId = "";
boolean hasHits = false;
SimpleMapping simpleMapping = new SimpleMapping(getPropertyAsList("mapping", new ArrayList<String>()));
Map<String, List<String>> mapping = simpleMapping.getMapping();
try {
url = getProperty("url", null);
scroll = getProperty("scroll", "1m");
String scrollBaseUrl = ElasticHelper.getScrollUrl(url);
pagedUrl = url + "&scroll=" + scroll;
do {
response = HTTPHelper.get(pagedUrl);
DocumentContext context = JsonPath.parse(response);
scrollId = context.read("$['_scroll_id']");
List<Object> elasticHits = context.read("$['hits']['hits'][*]['_source']");
hasHits = false;
int count = 0;
for (Object obj : elasticHits) {
hasHits = true;
Document document = new Document();
for (Map.Entry<String, List<String>> mappingEntry : mapping.entrySet()) {
try {
Object parsedValue = JsonPath.parse(obj).read(mappingEntry.getKey());
for (String target : mappingEntry.getValue()) {
document.setField(target, parsedValue);
}
} catch (PathNotFoundException e) {
// ignore
}
}
executer.document(document);
}
pagedUrl = scrollBaseUrl + "?scroll=" + scroll + "&scroll_id=" + scrollId;
} while (hasHits);
// executer.end();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class FilelineReader method read.
public void read() {
String absoluteFilename;
boolean addMeta = false;
String charset = getProperty("charset", StandardCharsets.UTF_8.name());
String filename = getProperty("filename", null);
absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
addMeta = getPropertyAsBoolean("addMeta", false);
Long maxRows = getPropertyAsInteger("maxRows", Long.MAX_VALUE);
String delimiter = getProperty("delimiter", ",");
String commentPrefix = getProperty("commentPrefix", "#");
String fieldName = getProperty("fieldName", "line");
try {
BufferedReader br = Files.newReader(new File(absoluteFilename), Charset.forName(charset));
String line;
long countLines = 0;
while ((line = br.readLine()) != null) {
countLines++;
if (countLines > maxRows) {
break;
}
if (line.startsWith(commentPrefix)) {
continue;
}
if (Strings.isNullOrEmpty(line)) {
continue;
}
Document document = new Document();
for (String item : Splitter.on(delimiter).trimResults().split(line)) {
document.addField(fieldName, item);
}
executer.document(document);
}
br.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations