use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class SolrFieldCounterTest method testSolrFieldCounterFilter.
@Test
public void testSolrFieldCounterFilter() {
configure();
Document document1 = DocumentBuilder.document().field("field1", "value1").create();
Document document2 = DocumentBuilder.document().field("field1", "value1").field("field2", "value2").create();
document(document1, document2);
assertFiled("field2", "1");
assertFiled("field1", "2");
assertNumberOfDocuments(1);
assertNumberOfFields(2);
assertInitWasDelegated();
assertEndWasDelegated();
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class CSVReader method read.
@Override
public void read() {
String absoluteFilename;
boolean addMeta = false;
try {
String charset = getProperty("charset", StandardCharsets.UTF_8.name());
String filename = getProperty("filename", null);
absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
addMeta = getPropertyAsBoolean("addMeta", false);
Long maxRows = getPropertyAsInteger("maxRows", Long.MAX_VALUE);
String delimiter = getProperty("delimiter", ",");
String arrayDelimiter = getProperty("arrayDelimiter", null);
String[] headers = getPropertyAsArray("headers", null);
InputStream in = IOUtils.getInputStream(absoluteFilename);
java.io.Reader reader = new InputStreamReader(in, charset);
CSVFormat format = CSVFormat.RFC4180;
if (headers == null) {
format = format.withHeader();
} else {
format = format.withHeader(headers);
}
format = format.withDelimiter(delimiter.charAt(0));
CSVParser parser = format.parse(reader);
Iterator<CSVRecord> csvIterator = parser.iterator();
long rowNumber = 0;
while (csvIterator.hasNext()) {
if (rowNumber >= maxRows) {
break;
}
rowNumber++;
CSVRecord record = csvIterator.next();
Map<String, Integer> header = parser.getHeaderMap();
Document document = new Document();
for (Map.Entry<String, Integer> entry : header.entrySet()) {
String key = entry.getKey();
try {
String value = record.get(key);
if (StringUtils.isEmpty(arrayDelimiter)) {
document.addField(key, value);
} else {
List<String> valueList = new ArrayList<String>();
String[] values = value.split(arrayDelimiter);
if (values.length > 0) {
for (String val : values) {
if (StringUtils.isNotEmpty(val)) {
valueList.add(val);
}
}
}
document.setField(key, valueList);
}
} catch (IllegalArgumentException e) {
}
}
if (addMeta) {
document.addField("rowNumber", String.valueOf(rowNumber));
document.addField("fileName", absoluteFilename);
}
executer.document(document);
}
// executer.end();
in.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class ElasticReader method read.
@Override
public void read() {
GsonBuilder builder = new GsonBuilder();
gson = builder.create();
String response = "";
String pagedUrl = "";
String scrollId = "";
boolean hasHits = false;
try {
url = getProperty("url", null);
scroll = getProperty("scroll", "1m");
String scrollBaseUrl = ElasticHelper.getScrollUrl(url);
pagedUrl = url + "&scroll=" + scroll;
do {
response = HTTPHelper.get(pagedUrl);
JsonElement jsonResponse = gson.fromJson(response, JsonElement.class);
scrollId = jsonResponse.getAsJsonObject().get("_scroll_id").getAsString();
Iterator<JsonElement> hitsIterator = jsonResponse.getAsJsonObject().get("hits").getAsJsonObject().get("hits").getAsJsonArray().iterator();
hasHits = false;
while (hitsIterator.hasNext()) {
hasHits = true;
Document document = new Document();
for (Entry<String, JsonElement> entry : hitsIterator.next().getAsJsonObject().get("_source").getAsJsonObject().entrySet()) {
if (entry.getValue().isJsonArray()) {
} else if (entry.getValue().isJsonPrimitive()) {
document.addField(entry.getKey(), entry.getValue().getAsString());
}
}
executer.document(document);
}
pagedUrl = scrollBaseUrl + "?scroll=" + scroll + "&scroll_id=" + scrollId;
} while (hasHits);
// executer.end();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class GCLogReader method processLine.
void processLine(String line) {
// executer.field("raw", line);
System.out.println(line);
Match gm = grok.match(line);
gm.captures();
Map<String, Object> m = gm.toMap();
Document document = new Document();
for (Map.Entry<String, Object> entry : m.entrySet()) {
Object value = entry.getValue();
document.addField(entry.getKey(), String.valueOf(value));
}
if (!m.isEmpty()) {
document.addField("filename", currentFileName);
if (keepRaw) {
document.addField("raw", line);
}
executer.document(document);
}
}
use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.
the class GrokReader method processLine.
void processLine(String line) {
// executer.field("raw", line);
// System.out.println(line);
Match gm = grok.match(line);
gm.captures();
Map<String, Object> m = gm.toMap();
Document document = new Document();
for (Map.Entry<String, Object> entry : m.entrySet()) {
Object value = entry.getValue();
document.addField(entry.getKey(), String.valueOf(value));
}
if (!m.isEmpty()) {
document.addField("filename", currentFileName);
if (keepRaw) {
document.addField("raw", line);
}
executer.document(document);
}
}
Aggregations