use of org.elasticsearch.action.index.IndexRequest in project metron by apache.
the class ElasticsearchDao method update.
@Override
public void update(Document update, Optional<String> index) throws IOException {
String indexPostfix = ElasticsearchUtils.getIndexFormat(accessConfig.getGlobalConfigSupplier().get()).format(new Date());
String sensorType = update.getSensorType();
String indexName = getIndexName(update, index, indexPostfix);
IndexRequest indexRequest = buildIndexRequest(update, sensorType, indexName);
try {
IndexResponse response = client.index(indexRequest).get();
ShardInfo shardInfo = response.getShardInfo();
int failed = shardInfo.getFailed();
if (failed > 0) {
throw new IOException("ElasticsearchDao index failed: " + Arrays.toString(shardInfo.getFailures()));
}
} catch (Exception e) {
throw new IOException(e.getMessage(), e);
}
}
use of org.elasticsearch.action.index.IndexRequest in project nutch by apache.
the class ElasticIndexWriter method write.
@Override
public void write(NutchDocument doc) throws IOException {
String id = (String) doc.getFieldValue("id");
String type = doc.getDocumentMeta().get("type");
if (type == null)
type = "doc";
// Add each field of this doc to the index source
Map<String, Object> source = new HashMap<String, Object>();
for (final Map.Entry<String, NutchField> e : doc) {
final List<Object> values = e.getValue().getValues();
if (values.size() > 1) {
source.put(e.getKey(), values);
} else {
source.put(e.getKey(), values.get(0));
}
}
IndexRequest request = new IndexRequest(defaultIndex, type, id).source(source);
bulkProcessor.add(request);
}
use of org.elasticsearch.action.index.IndexRequest in project incubator-sdap-mudrod by apache.
the class CrawlerDetection method checkByRate.
private int checkByRate(ESDriver es, String user) {
int rate = Integer.parseInt(props.getProperty(MudrodConstants.REQUEST_RATE));
Pattern pattern = Pattern.compile("get (.*?) http/*");
Matcher matcher;
BoolQueryBuilder filterSearch = new BoolQueryBuilder();
filterSearch.must(QueryBuilders.termQuery("IP", user));
AggregationBuilder aggregation = AggregationBuilders.dateHistogram("by_minute").field("Time").dateHistogramInterval(DateHistogramInterval.MINUTE).order(Order.COUNT_DESC);
SearchResponse checkRobot = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setQuery(filterSearch).setSize(0).addAggregation(aggregation).execute().actionGet();
Histogram agg = checkRobot.getAggregations().get("by_minute");
List<? extends Histogram.Bucket> botList = agg.getBuckets();
long maxCount = botList.get(0).getDocCount();
if (maxCount >= rate) {
return 0;
} else {
DateTime dt1 = null;
int toLast = 0;
SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
String logtype = (String) result.get("LogType");
if (logtype.equals(MudrodConstants.HTTP_LOG)) {
String request = (String) result.get("Request");
matcher = pattern.matcher(request.trim().toLowerCase());
boolean find = false;
while (matcher.find()) {
request = matcher.group(1);
result.put("RequestUrl", props.getProperty(MudrodConstants.BASE_URL) + request);
find = true;
}
if (!find) {
result.put("RequestUrl", request);
}
} else {
result.put("RequestUrl", result.get("Request"));
}
DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
DateTime dt2 = fmt.parseDateTime((String) result.get("Time"));
if (dt1 == null) {
toLast = 0;
} else {
toLast = Math.abs(Seconds.secondsBetween(dt1, dt2).getSeconds());
}
result.put("ToLast", toLast);
IndexRequest ir = new IndexRequest(logIndex, cleanupType).source(result);
es.getBulkProcessor().add(ir);
dt1 = dt2;
}
scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
}
return 1;
}
use of org.elasticsearch.action.index.IndexRequest in project incubator-sdap-mudrod by apache.
the class ImportLogFile method parseSingleLineFTP.
/**
* Parse a single FTP log entry
*
* @param log a single log line
* @param index the index name we wish to persist the log line to
* @param type one of the available protocols from which Mudrod logs are obtained.
*/
public void parseSingleLineFTP(String log, String index, String type) {
String ip = log.split(" +")[6];
String time = log.split(" +")[1] + ":" + log.split(" +")[2] + ":" + log.split(" +")[3] + ":" + log.split(" +")[4];
time = switchtoNum(time);
SimpleDateFormat formatter = new SimpleDateFormat("MM:dd:HH:mm:ss:yyyy");
Date date = null;
try {
date = formatter.parse(time);
} catch (ParseException e) {
LOG.error("Error whilst parsing the date.", e);
}
String bytes = log.split(" +")[7];
String request = log.split(" +")[8].toLowerCase();
if (!request.contains("/misc/") && !request.contains("readme")) {
IndexRequest ir;
try {
ir = new IndexRequest(index, type).source(jsonBuilder().startObject().field("LogType", MudrodConstants.FTP_LOG).field("IP", ip).field("Time", date).field("Request", request).field("Bytes", Long.parseLong(bytes)).endObject());
es.getBulkProcessor().add(ir);
} catch (NumberFormatException e) {
LOG.error("Error whilst processing numbers", e);
} catch (IOException e) {
LOG.error("IOError whilst adding to the bulk processor.", e);
}
}
}
use of org.elasticsearch.action.index.IndexRequest in project incubator-sdap-mudrod by apache.
the class SessionStatistic method processSession.
public int processSession(ESDriver es, String sessionId) throws IOException, InterruptedException, ExecutionException {
String inputType = cleanupType;
String outputType = sessionStats;
DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
String min = null;
String max = null;
DateTime start = null;
DateTime end = null;
int duration = 0;
float requestRate = 0;
int sessionCount = 0;
Pattern pattern = Pattern.compile("get (.*?) http/*");
StatsAggregationBuilder statsAgg = AggregationBuilders.stats("Stats").field("Time");
BoolQueryBuilder filterSearch = new BoolQueryBuilder();
filterSearch.must(QueryBuilders.termQuery("SessionID", sessionId));
SearchResponse sr = es.getClient().prepareSearch(logIndex).setTypes(inputType).setQuery(filterSearch).addAggregation(statsAgg).execute().actionGet();
Stats agg = sr.getAggregations().get("Stats");
min = agg.getMinAsString();
max = agg.getMaxAsString();
start = fmt.parseDateTime(min);
end = fmt.parseDateTime(max);
duration = Seconds.secondsBetween(start, end).getSeconds();
int searchDataListRequestCount = 0;
int searchDataRequestCount = 0;
int searchDataListRequestByKeywordsCount = 0;
int ftpRequestCount = 0;
int keywordsNum = 0;
String iP = null;
String keywords = "";
String views = "";
String downloads = "";
SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(inputType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
String request = (String) result.get("Request");
String logType = (String) result.get("LogType");
iP = (String) result.get("IP");
Matcher matcher = pattern.matcher(request.trim().toLowerCase());
while (matcher.find()) {
request = matcher.group(1);
}
String datasetlist = props.getProperty(MudrodConstants.SEARCH_MARKER);
String dataset = props.getProperty(MudrodConstants.VIEW_MARKER);
if (request.contains(datasetlist)) {
searchDataListRequestCount++;
RequestUrl requestURL = new RequestUrl();
String infoStr = requestURL.getSearchInfo(request) + ",";
String info = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
if (!",".equals(info)) {
if ("".equals(keywords)) {
keywords = keywords + info;
} else {
String[] items = info.split(",");
String[] keywordList = keywords.split(",");
for (String item : items) {
if (!Arrays.asList(keywordList).contains(item)) {
keywords = keywords + item + ",";
}
}
}
}
}
if (request.startsWith(dataset)) {
searchDataRequestCount++;
if (findDataset(request) != null) {
String view = findDataset(request);
if ("".equals(views))
views = view;
else if (!views.contains(view))
views = views + "," + view;
}
}
if (MudrodConstants.FTP_LOG.equals(logType)) {
ftpRequestCount++;
String download = "";
String requestLowercase = request.toLowerCase();
if (!requestLowercase.endsWith(".jpg") && !requestLowercase.endsWith(".pdf") && !requestLowercase.endsWith(".txt") && !requestLowercase.endsWith(".gif")) {
download = request;
}
if ("".equals(downloads)) {
downloads = download;
} else {
if (!downloads.contains(download)) {
downloads = downloads + "," + download;
}
}
}
}
scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
// Break condition: No hits are returned
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
if (!"".equals(keywords)) {
keywordsNum = keywords.split(",").length;
}
if (searchDataListRequestCount != 0 && searchDataListRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.SEARCH_F)) && searchDataRequestCount != 0 && searchDataRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.VIEW_F)) && ftpRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.DOWNLOAD_F))) {
String sessionURL = props.getProperty(MudrodConstants.SESSION_PORT) + props.getProperty(MudrodConstants.SESSION_URL) + "?sessionid=" + sessionId + "&sessionType=" + outputType + "&requestType=" + inputType;
sessionCount = 1;
IndexRequest ir = new IndexRequest(logIndex, outputType).source(jsonBuilder().startObject().field("SessionID", sessionId).field("SessionURL", sessionURL).field("Duration", duration).field("Number of Keywords", keywordsNum).field("Time", min).field("End_time", max).field("searchDataListRequest_count", searchDataListRequestCount).field("searchDataListRequest_byKeywords_count", searchDataListRequestByKeywordsCount).field("searchDataRequest_count", searchDataRequestCount).field("keywords", es.customAnalyzing(logIndex, keywords)).field("views", views).field("downloads", downloads).field("request_rate", requestRate).field("Comments", "").field("Validation", 0).field("Produceby", 0).field("Correlation", 0).field("IP", iP).endObject());
es.getBulkProcessor().add(ir);
}
return sessionCount;
}
Aggregations