use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.SearchHit in project incubator-sdap-mudrod by apache.
the class CrawlerDetection method checkByRate.
private int checkByRate(ESDriver es, String user) {
int rate = Integer.parseInt(props.getProperty(MudrodConstants.REQUEST_RATE));
Pattern pattern = Pattern.compile("get (.*?) http/*");
Matcher matcher;
BoolQueryBuilder filterSearch = new BoolQueryBuilder();
filterSearch.must(QueryBuilders.termQuery("IP", user));
AggregationBuilder aggregation = AggregationBuilders.dateHistogram("by_minute").field("Time").dateHistogramInterval(DateHistogramInterval.MINUTE).order(Order.COUNT_DESC);
SearchResponse checkRobot = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setQuery(filterSearch).setSize(0).addAggregation(aggregation).execute().actionGet();
Histogram agg = checkRobot.getAggregations().get("by_minute");
List<? extends Histogram.Bucket> botList = agg.getBuckets();
long maxCount = botList.get(0).getDocCount();
if (maxCount >= rate) {
return 0;
} else {
DateTime dt1 = null;
int toLast = 0;
SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
String logtype = (String) result.get("LogType");
if (logtype.equals(MudrodConstants.HTTP_LOG)) {
String request = (String) result.get("Request");
matcher = pattern.matcher(request.trim().toLowerCase());
boolean find = false;
while (matcher.find()) {
request = matcher.group(1);
result.put("RequestUrl", props.getProperty(MudrodConstants.BASE_URL) + request);
find = true;
}
if (!find) {
result.put("RequestUrl", request);
}
} else {
result.put("RequestUrl", result.get("Request"));
}
DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
DateTime dt2 = fmt.parseDateTime((String) result.get("Time"));
if (dt1 == null) {
toLast = 0;
} else {
toLast = Math.abs(Seconds.secondsBetween(dt1, dt2).getSeconds());
}
result.put("ToLast", toLast);
IndexRequest ir = new IndexRequest(logIndex, cleanupType).source(result);
es.getBulkProcessor().add(ir);
dt1 = dt2;
}
scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
}
return 1;
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.SearchHit in project incubator-sdap-mudrod by apache.
the class SessionGenerator method combineShortSessions.
public void combineShortSessions(ESDriver es, String user, int timeThres) throws ElasticsearchException, IOException {
BoolQueryBuilder filterSearch = new BoolQueryBuilder();
filterSearch.must(QueryBuilders.termQuery("IP", user));
String[] indexArr = new String[] { logIndex };
String[] typeArr = new String[] { cleanupType };
int docCount = es.getDocCount(indexArr, typeArr, filterSearch);
if (docCount < 3) {
deleteInvalid(es, user);
return;
}
BoolQueryBuilder filterCheck = new BoolQueryBuilder();
filterCheck.must(QueryBuilders.termQuery("IP", user)).must(QueryBuilders.termQuery("Referer", "-"));
SearchResponse checkReferer = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(filterCheck).setSize(0).execute().actionGet();
long numInvalid = checkReferer.getHits().getTotalHits();
double invalidRate = numInvalid / docCount;
if (invalidRate >= 0.8) {
deleteInvalid(es, user);
return;
}
StatsAggregationBuilder statsAgg = AggregationBuilders.stats("Stats").field("Time");
SearchResponse srSession = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(filterSearch).addAggregation(AggregationBuilders.terms("Sessions").field("SessionID").size(docCount).subAggregation(statsAgg)).execute().actionGet();
Terms sessions = srSession.getAggregations().get("Sessions");
List<Session> sessionList = new ArrayList<>();
for (Terms.Bucket session : sessions.getBuckets()) {
Stats agg = session.getAggregations().get("Stats");
Session sess = new Session(props, es, agg.getMinAsString(), agg.getMaxAsString(), session.getKey().toString());
sessionList.add(sess);
}
Collections.sort(sessionList);
DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
String last = null;
String lastnewID = null;
String lastoldID = null;
String current = null;
for (Session s : sessionList) {
current = s.getEndTime();
if (last != null) {
if (Seconds.secondsBetween(fmt.parseDateTime(last), fmt.parseDateTime(current)).getSeconds() < timeThres) {
if (lastnewID == null) {
s.setNewID(lastoldID);
} else {
s.setNewID(lastnewID);
}
QueryBuilder fs = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("SessionID", s.getID()));
SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(fs).setSize(100).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
if (lastnewID == null) {
update(es, logIndex, this.cleanupType, hit.getId(), "SessionID", lastoldID);
} else {
update(es, logIndex, this.cleanupType, hit.getId(), "SessionID", lastnewID);
}
}
scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
}
}
lastoldID = s.getID();
lastnewID = s.getNewID();
last = current;
}
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.SearchHit in project incubator-sdap-mudrod by apache.
the class SessionStatistic method processSession.
public int processSession(ESDriver es, String sessionId) throws IOException, InterruptedException, ExecutionException {
String inputType = cleanupType;
String outputType = sessionStats;
DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
String min = null;
String max = null;
DateTime start = null;
DateTime end = null;
int duration = 0;
float requestRate = 0;
int sessionCount = 0;
Pattern pattern = Pattern.compile("get (.*?) http/*");
StatsAggregationBuilder statsAgg = AggregationBuilders.stats("Stats").field("Time");
BoolQueryBuilder filterSearch = new BoolQueryBuilder();
filterSearch.must(QueryBuilders.termQuery("SessionID", sessionId));
SearchResponse sr = es.getClient().prepareSearch(logIndex).setTypes(inputType).setQuery(filterSearch).addAggregation(statsAgg).execute().actionGet();
Stats agg = sr.getAggregations().get("Stats");
min = agg.getMinAsString();
max = agg.getMaxAsString();
start = fmt.parseDateTime(min);
end = fmt.parseDateTime(max);
duration = Seconds.secondsBetween(start, end).getSeconds();
int searchDataListRequestCount = 0;
int searchDataRequestCount = 0;
int searchDataListRequestByKeywordsCount = 0;
int ftpRequestCount = 0;
int keywordsNum = 0;
String iP = null;
String keywords = "";
String views = "";
String downloads = "";
SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(inputType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
String request = (String) result.get("Request");
String logType = (String) result.get("LogType");
iP = (String) result.get("IP");
Matcher matcher = pattern.matcher(request.trim().toLowerCase());
while (matcher.find()) {
request = matcher.group(1);
}
String datasetlist = props.getProperty(MudrodConstants.SEARCH_MARKER);
String dataset = props.getProperty(MudrodConstants.VIEW_MARKER);
if (request.contains(datasetlist)) {
searchDataListRequestCount++;
RequestUrl requestURL = new RequestUrl();
String infoStr = requestURL.getSearchInfo(request) + ",";
String info = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
if (!",".equals(info)) {
if ("".equals(keywords)) {
keywords = keywords + info;
} else {
String[] items = info.split(",");
String[] keywordList = keywords.split(",");
for (String item : items) {
if (!Arrays.asList(keywordList).contains(item)) {
keywords = keywords + item + ",";
}
}
}
}
}
if (request.startsWith(dataset)) {
searchDataRequestCount++;
if (findDataset(request) != null) {
String view = findDataset(request);
if ("".equals(views))
views = view;
else if (!views.contains(view))
views = views + "," + view;
}
}
if (MudrodConstants.FTP_LOG.equals(logType)) {
ftpRequestCount++;
String download = "";
String requestLowercase = request.toLowerCase();
if (!requestLowercase.endsWith(".jpg") && !requestLowercase.endsWith(".pdf") && !requestLowercase.endsWith(".txt") && !requestLowercase.endsWith(".gif")) {
download = request;
}
if ("".equals(downloads)) {
downloads = download;
} else {
if (!downloads.contains(download)) {
downloads = downloads + "," + download;
}
}
}
}
scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
// Break condition: No hits are returned
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
if (!"".equals(keywords)) {
keywordsNum = keywords.split(",").length;
}
if (searchDataListRequestCount != 0 && searchDataListRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.SEARCH_F)) && searchDataRequestCount != 0 && searchDataRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.VIEW_F)) && ftpRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.DOWNLOAD_F))) {
String sessionURL = props.getProperty(MudrodConstants.SESSION_PORT) + props.getProperty(MudrodConstants.SESSION_URL) + "?sessionid=" + sessionId + "&sessionType=" + outputType + "&requestType=" + inputType;
sessionCount = 1;
IndexRequest ir = new IndexRequest(logIndex, outputType).source(jsonBuilder().startObject().field("SessionID", sessionId).field("SessionURL", sessionURL).field("Duration", duration).field("Number of Keywords", keywordsNum).field("Time", min).field("End_time", max).field("searchDataListRequest_count", searchDataListRequestCount).field("searchDataListRequest_byKeywords_count", searchDataListRequestByKeywordsCount).field("searchDataRequest_count", searchDataRequestCount).field("keywords", es.customAnalyzing(logIndex, keywords)).field("views", views).field("downloads", downloads).field("request_rate", requestRate).field("Comments", "").field("Validation", 0).field("Produceby", 0).field("Correlation", 0).field("IP", iP).endObject());
es.getBulkProcessor().add(ir);
}
return sessionCount;
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.SearchHit in project incubator-sdap-mudrod by apache.
the class ESDriver method deleteAllByQuery.
public void deleteAllByQuery(String index, String type, QueryBuilder query) {
ImmutableOpenMap<String, MappingMetaData> mappings = getClient().admin().cluster().prepareState().execute().actionGet().getState().metaData().index(index).getMappings();
// check if the type exists
if (!mappings.containsKey(type))
return;
createBulkProcessor();
SearchResponse scrollResp = getClient().prepareSearch(index).setSearchType(SearchType.QUERY_AND_FETCH).setTypes(type).setScroll(new TimeValue(60000)).setQuery(query).setSize(10000).execute().actionGet();
while (true) {
for (SearchHit hit : scrollResp.getHits().getHits()) {
DeleteRequest deleteRequest = new DeleteRequest(index, type, hit.getId());
getBulkProcessor().add(deleteRequest);
}
scrollResp = getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
if (scrollResp.getHits().getHits().length == 0) {
break;
}
}
destroyBulkProcessor();
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.SearchHit in project incubator-sdap-mudrod by apache.
the class ESDriver method searchByQuery.
@SuppressWarnings("unchecked")
public String searchByQuery(String index, String type, String query, Boolean bDetail) throws IOException, InterruptedException, ExecutionException {
boolean exists = getClient().admin().indices().prepareExists(index).execute().actionGet().isExists();
if (!exists) {
return null;
}
QueryBuilder qb = QueryBuilders.queryStringQuery(query);
SearchResponse response = getClient().prepareSearch(index).setTypes(type).setQuery(qb).setSize(500).execute().actionGet();
// Map of K,V pairs where key is the field name from search result and value is the that should be returned for that field. Not always the same.
Map<String, String> fieldsToReturn = new HashMap<>();
fieldsToReturn.put("Dataset-ShortName", "Short Name");
fieldsToReturn.put("Dataset-LongName", "Long Name");
fieldsToReturn.put("DatasetParameter-Topic", "Topic");
fieldsToReturn.put("Dataset-Description", "Dataset-Description");
fieldsToReturn.put("DatasetCitation-ReleaseDateLong", "Release Date");
if (bDetail) {
fieldsToReturn.put("DatasetPolicy-DataFormat", "DataFormat");
fieldsToReturn.put("Dataset-Doi", "Dataset-Doi");
fieldsToReturn.put("Dataset-ProcessingLevel", "Processing Level");
fieldsToReturn.put("DatasetCitation-Version", "Version");
fieldsToReturn.put("DatasetSource-Sensor-ShortName", "DatasetSource-Sensor-ShortName");
fieldsToReturn.put("DatasetProject-Project-ShortName", "DatasetProject-Project-ShortName");
fieldsToReturn.put("DatasetParameter-Category", "DatasetParameter-Category");
fieldsToReturn.put("DatasetLocationPolicy-BasePath", "DatasetLocationPolicy-BasePath");
fieldsToReturn.put("DatasetParameter-Variable-Full", "DatasetParameter-Variable-Full");
fieldsToReturn.put("DatasetParameter-Term-Full", "DatasetParameter-Term-Full");
fieldsToReturn.put("DatasetParameter-VariableDetail", "DatasetParameter-VariableDetail");
fieldsToReturn.put("DatasetRegion-Region", "Region");
fieldsToReturn.put("DatasetCoverage-NorthLat", "NorthLat");
fieldsToReturn.put("DatasetCoverage-SouthLat", "SouthLat");
fieldsToReturn.put("DatasetCoverage-WestLon", "WestLon");
fieldsToReturn.put("DatasetCoverage-EastLon", "EastLon");
fieldsToReturn.put("DatasetCoverage-StartTimeLong-Long", "DatasetCoverage-StartTimeLong-Long");
fieldsToReturn.put("Dataset-DatasetCoverage-StopTimeLong", "Dataset-DatasetCoverage-StopTimeLong");
fieldsToReturn.put("Dataset-TemporalResolution", "Dataset-TemporalResolution");
fieldsToReturn.put("Dataset-TemporalRepeat", "Dataset-TemporalRepeat");
fieldsToReturn.put("Dataset-LatitudeResolution", "Dataset-LatitudeResolution");
fieldsToReturn.put("Dataset-LongitudeResolution", "Dataset-LongitudeResolution");
fieldsToReturn.put("Dataset-AcrossTrackResolution", "Dataset-AcrossTrackResolution");
fieldsToReturn.put("Dataset-AlongTrackResolution", "Dataset-AlongTrackResolution");
}
List<Map<String, Object>> searchResults = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
Map<String, Object> source = hit.getSource();
Map<String, Object> searchResult = source.entrySet().stream().filter(entry -> fieldsToReturn.keySet().contains(entry.getKey())).collect(Collectors.toMap(entry -> fieldsToReturn.get(entry.getKey()), Entry::getValue));
// searchResult is now a map where the key = value from fieldsToReturn and the value = value from search result
// Some results require special handling/formatting:
// Release Date formatting
LocalDate releaseDate = Instant.ofEpochMilli(Long.parseLong(((ArrayList<String>) searchResult.get("Release Date")).get(0))).atZone(ZoneId.of("Z")).toLocalDate();
searchResult.put("Release Date", releaseDate.format(DateTimeFormatter.ISO_DATE));
if (bDetail) {
// DataFormat value, translate RAW to BINARY
if ("RAW".equals(searchResult.get("DataFormat"))) {
searchResult.put("DataFormat", "BINARY");
}
// DatasetLocationPolicy-BasePath Should only contain ftp, http, or https URLs
List<String> urls = ((List<String>) searchResult.get("DatasetLocationPolicy-BasePath")).stream().filter(url -> url.startsWith("ftp") || url.startsWith("http")).collect(Collectors.toList());
searchResult.put("DatasetLocationPolicy-BasePath", urls);
// Time Span Formatting
LocalDate startDate = Instant.ofEpochMilli((Long) searchResult.get("DatasetCoverage-StartTimeLong-Long")).atZone(ZoneId.of("Z")).toLocalDate();
LocalDate endDate = "".equals(searchResult.get("Dataset-DatasetCoverage-StopTimeLong")) ? null : Instant.ofEpochMilli(Long.parseLong(searchResult.get("Dataset-DatasetCoverage-StopTimeLong").toString())).atZone(ZoneId.of("Z")).toLocalDate();
searchResult.put("Time Span", startDate.format(DateTimeFormatter.ISO_DATE) + " to " + (endDate == null ? "Present" : endDate.format(DateTimeFormatter.ISO_DATE)));
// Temporal resolution can come from one of two fields
searchResult.put("TemporalResolution", "".equals(searchResult.get("Dataset-TemporalResolution")) ? searchResult.get("Dataset-TemporalRepeat") : searchResult.get("Dataset-TemporalResolution"));
// Special formatting for spatial resolution
String latResolution = (String) searchResult.get("Dataset-LatitudeResolution");
String lonResolution = (String) searchResult.get("Dataset-LongitudeResolution");
if (!latResolution.isEmpty() && !lonResolution.isEmpty()) {
searchResult.put("SpatialResolution", latResolution + " degrees (latitude) x " + lonResolution + " degrees (longitude)");
} else {
String acrossResolution = (String) searchResult.get("Dataset-AcrossTrackResolution");
String alonResolution = (String) searchResult.get("Dataset-AlongTrackResolution");
double dAcrossResolution = Double.parseDouble(acrossResolution) / 1000;
double dAlonResolution = Double.parseDouble(alonResolution) / 1000;
searchResult.put("SpatialResolution", dAlonResolution + " km (Along) x " + dAcrossResolution + " km (Across)");
}
// Measurement is a list of hierarchies that goes Topic -> Term -> Variable -> Variable Detail. Need to construct these hierarchies.
List<List<String>> measurements = buildMeasurementHierarchies((List<String>) searchResult.get("Topic"), (List<String>) searchResult.get("DatasetParameter-Term-Full"), (List<String>) searchResult.get("DatasetParameter-Variable-Full"), (List<String>) searchResult.get("DatasetParameter-VariableDetail"));
searchResult.put("Measurements", measurements);
}
searchResults.add(searchResult);
}
Map<String, List<?>> pdResults = new HashMap<>();
pdResults.put("PDResults", searchResults);
return new GsonBuilder().create().toJson(pdResults);
}
Aggregations