Search in sources :

Example 61 with TimeValue

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.common.unit.TimeValue in project herd by FINRAOS.

the class IndexFunctionsDaoImpl method getIdsInIndex.

/**
 * The ids in index function will take as arguments the index name and the document type and will return a list of all the ids in the index.
 */
@Override
public final List<String> getIdsInIndex(String indexName, String documentType) {
    // Create an array list for storing the ids
    List<String> idList = new ArrayList<>();
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.query(QueryBuilders.matchAllQuery());
    // Create a search request and set the scroll time and scroll size
    final SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(new ElasticsearchClientImpl(), SearchAction.INSTANCE);
    searchRequestBuilder.setIndices(indexName).setTypes(documentType).setScroll(new TimeValue(ELASTIC_SEARCH_SCROLL_KEEP_ALIVE_TIME)).setSize(ELASTIC_SEARCH_SCROLL_PAGE_SIZE).setSource(searchSourceBuilder);
    // Retrieve the search response
    final Search.Builder searchBuilder = new Search.Builder(searchRequestBuilder.toString()).addIndex(indexName);
    searchBuilder.setParameter(Parameters.SIZE, ELASTIC_SEARCH_SCROLL_PAGE_SIZE);
    searchBuilder.setParameter(Parameters.SCROLL, new TimeValue(ELASTIC_SEARCH_SCROLL_KEEP_ALIVE_TIME).toString());
    JestResult jestResult = jestClientHelper.searchExecute(searchBuilder.build());
    // While there are hits available, page through the results and add them to the id list
    while (jestResult.getSourceAsStringList().size() != 0) {
        for (String jsonString : jestResult.getSourceAsStringList()) {
            JsonElement root = new JsonParser().parse(jsonString);
            idList.add(root.getAsJsonObject().get("id").getAsString());
        }
        String scrollId = jestResult.getJsonObject().get("_scroll_id").getAsString();
        SearchScroll scroll = new SearchScroll.Builder(scrollId, new TimeValue(ELASTIC_SEARCH_SCROLL_KEEP_ALIVE_TIME).toString()).build();
        jestResult = jestClientHelper.searchScrollExecute(scroll);
    }
    return idList;
}
Also used : SearchRequestBuilder(org.elasticsearch.action.search.SearchRequestBuilder) ArrayList(java.util.ArrayList) ElasticsearchClientImpl(org.finra.herd.dao.helper.ElasticsearchClientImpl) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchScroll(io.searchbox.core.SearchScroll) JsonElement(com.google.gson.JsonElement) Search(io.searchbox.core.Search) TimeValue(org.elasticsearch.common.unit.TimeValue) JestResult(io.searchbox.client.JestResult) JsonParser(com.google.gson.JsonParser)

Example 62 with TimeValue

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.common.unit.TimeValue in project incubator-sdap-mudrod by apache.

the class CrawlerDetection method checkByRate.

private int checkByRate(ESDriver es, String user) {
    int rate = Integer.parseInt(props.getProperty(MudrodConstants.REQUEST_RATE));
    Pattern pattern = Pattern.compile("get (.*?) http/*");
    Matcher matcher;
    BoolQueryBuilder filterSearch = new BoolQueryBuilder();
    filterSearch.must(QueryBuilders.termQuery("IP", user));
    AggregationBuilder aggregation = AggregationBuilders.dateHistogram("by_minute").field("Time").dateHistogramInterval(DateHistogramInterval.MINUTE).order(Order.COUNT_DESC);
    SearchResponse checkRobot = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setQuery(filterSearch).setSize(0).addAggregation(aggregation).execute().actionGet();
    Histogram agg = checkRobot.getAggregations().get("by_minute");
    List<? extends Histogram.Bucket> botList = agg.getBuckets();
    long maxCount = botList.get(0).getDocCount();
    if (maxCount >= rate) {
        return 0;
    } else {
        DateTime dt1 = null;
        int toLast = 0;
        SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
        while (true) {
            for (SearchHit hit : scrollResp.getHits().getHits()) {
                Map<String, Object> result = hit.getSource();
                String logtype = (String) result.get("LogType");
                if (logtype.equals(MudrodConstants.HTTP_LOG)) {
                    String request = (String) result.get("Request");
                    matcher = pattern.matcher(request.trim().toLowerCase());
                    boolean find = false;
                    while (matcher.find()) {
                        request = matcher.group(1);
                        result.put("RequestUrl", props.getProperty(MudrodConstants.BASE_URL) + request);
                        find = true;
                    }
                    if (!find) {
                        result.put("RequestUrl", request);
                    }
                } else {
                    result.put("RequestUrl", result.get("Request"));
                }
                DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
                DateTime dt2 = fmt.parseDateTime((String) result.get("Time"));
                if (dt1 == null) {
                    toLast = 0;
                } else {
                    toLast = Math.abs(Seconds.secondsBetween(dt1, dt2).getSeconds());
                }
                result.put("ToLast", toLast);
                IndexRequest ir = new IndexRequest(logIndex, cleanupType).source(result);
                es.getBulkProcessor().add(ir);
                dt1 = dt2;
            }
            scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
            if (scrollResp.getHits().getHits().length == 0) {
                break;
            }
        }
    }
    return 1;
}
Also used : Pattern(java.util.regex.Pattern) Histogram(org.elasticsearch.search.aggregations.bucket.histogram.Histogram) AggregationBuilder(org.elasticsearch.search.aggregations.AggregationBuilder) SearchHit(org.elasticsearch.search.SearchHit) Matcher(java.util.regex.Matcher) IndexRequest(org.elasticsearch.action.index.IndexRequest) DateTime(org.joda.time.DateTime) SearchResponse(org.elasticsearch.action.search.SearchResponse) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) TimeValue(org.elasticsearch.common.unit.TimeValue)

Example 63 with TimeValue

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.common.unit.TimeValue in project incubator-sdap-mudrod by apache.

the class SessionGenerator method combineShortSessions.

public void combineShortSessions(ESDriver es, String user, int timeThres) throws ElasticsearchException, IOException {
    BoolQueryBuilder filterSearch = new BoolQueryBuilder();
    filterSearch.must(QueryBuilders.termQuery("IP", user));
    String[] indexArr = new String[] { logIndex };
    String[] typeArr = new String[] { cleanupType };
    int docCount = es.getDocCount(indexArr, typeArr, filterSearch);
    if (docCount < 3) {
        deleteInvalid(es, user);
        return;
    }
    BoolQueryBuilder filterCheck = new BoolQueryBuilder();
    filterCheck.must(QueryBuilders.termQuery("IP", user)).must(QueryBuilders.termQuery("Referer", "-"));
    SearchResponse checkReferer = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(filterCheck).setSize(0).execute().actionGet();
    long numInvalid = checkReferer.getHits().getTotalHits();
    double invalidRate = numInvalid / docCount;
    if (invalidRate >= 0.8) {
        deleteInvalid(es, user);
        return;
    }
    StatsAggregationBuilder statsAgg = AggregationBuilders.stats("Stats").field("Time");
    SearchResponse srSession = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(filterSearch).addAggregation(AggregationBuilders.terms("Sessions").field("SessionID").size(docCount).subAggregation(statsAgg)).execute().actionGet();
    Terms sessions = srSession.getAggregations().get("Sessions");
    List<Session> sessionList = new ArrayList<>();
    for (Terms.Bucket session : sessions.getBuckets()) {
        Stats agg = session.getAggregations().get("Stats");
        Session sess = new Session(props, es, agg.getMinAsString(), agg.getMaxAsString(), session.getKey().toString());
        sessionList.add(sess);
    }
    Collections.sort(sessionList);
    DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
    String last = null;
    String lastnewID = null;
    String lastoldID = null;
    String current = null;
    for (Session s : sessionList) {
        current = s.getEndTime();
        if (last != null) {
            if (Seconds.secondsBetween(fmt.parseDateTime(last), fmt.parseDateTime(current)).getSeconds() < timeThres) {
                if (lastnewID == null) {
                    s.setNewID(lastoldID);
                } else {
                    s.setNewID(lastnewID);
                }
                QueryBuilder fs = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("SessionID", s.getID()));
                SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(this.cleanupType).setScroll(new TimeValue(60000)).setQuery(fs).setSize(100).execute().actionGet();
                while (true) {
                    for (SearchHit hit : scrollResp.getHits().getHits()) {
                        if (lastnewID == null) {
                            update(es, logIndex, this.cleanupType, hit.getId(), "SessionID", lastoldID);
                        } else {
                            update(es, logIndex, this.cleanupType, hit.getId(), "SessionID", lastnewID);
                        }
                    }
                    scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
                    if (scrollResp.getHits().getHits().length == 0) {
                        break;
                    }
                }
            }
        }
        lastoldID = s.getID();
        lastnewID = s.getNewID();
        last = current;
    }
}
Also used : StatsAggregationBuilder(org.elasticsearch.search.aggregations.metrics.stats.StatsAggregationBuilder) SearchHit(org.elasticsearch.search.SearchHit) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Stats(org.elasticsearch.search.aggregations.metrics.stats.Stats) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) TimeValue(org.elasticsearch.common.unit.TimeValue) Session(org.apache.sdap.mudrod.weblog.structure.Session)

Example 64 with TimeValue

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.common.unit.TimeValue in project incubator-sdap-mudrod by apache.

the class SessionStatistic method processSession.

public int processSession(ESDriver es, String sessionId) throws IOException, InterruptedException, ExecutionException {
    String inputType = cleanupType;
    String outputType = sessionStats;
    DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
    String min = null;
    String max = null;
    DateTime start = null;
    DateTime end = null;
    int duration = 0;
    float requestRate = 0;
    int sessionCount = 0;
    Pattern pattern = Pattern.compile("get (.*?) http/*");
    StatsAggregationBuilder statsAgg = AggregationBuilders.stats("Stats").field("Time");
    BoolQueryBuilder filterSearch = new BoolQueryBuilder();
    filterSearch.must(QueryBuilders.termQuery("SessionID", sessionId));
    SearchResponse sr = es.getClient().prepareSearch(logIndex).setTypes(inputType).setQuery(filterSearch).addAggregation(statsAgg).execute().actionGet();
    Stats agg = sr.getAggregations().get("Stats");
    min = agg.getMinAsString();
    max = agg.getMaxAsString();
    start = fmt.parseDateTime(min);
    end = fmt.parseDateTime(max);
    duration = Seconds.secondsBetween(start, end).getSeconds();
    int searchDataListRequestCount = 0;
    int searchDataRequestCount = 0;
    int searchDataListRequestByKeywordsCount = 0;
    int ftpRequestCount = 0;
    int keywordsNum = 0;
    String iP = null;
    String keywords = "";
    String views = "";
    String downloads = "";
    SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(inputType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
    while (true) {
        for (SearchHit hit : scrollResp.getHits().getHits()) {
            Map<String, Object> result = hit.getSource();
            String request = (String) result.get("Request");
            String logType = (String) result.get("LogType");
            iP = (String) result.get("IP");
            Matcher matcher = pattern.matcher(request.trim().toLowerCase());
            while (matcher.find()) {
                request = matcher.group(1);
            }
            String datasetlist = props.getProperty(MudrodConstants.SEARCH_MARKER);
            String dataset = props.getProperty(MudrodConstants.VIEW_MARKER);
            if (request.contains(datasetlist)) {
                searchDataListRequestCount++;
                RequestUrl requestURL = new RequestUrl();
                String infoStr = requestURL.getSearchInfo(request) + ",";
                String info = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
                if (!",".equals(info)) {
                    if ("".equals(keywords)) {
                        keywords = keywords + info;
                    } else {
                        String[] items = info.split(",");
                        String[] keywordList = keywords.split(",");
                        for (String item : items) {
                            if (!Arrays.asList(keywordList).contains(item)) {
                                keywords = keywords + item + ",";
                            }
                        }
                    }
                }
            }
            if (request.startsWith(dataset)) {
                searchDataRequestCount++;
                if (findDataset(request) != null) {
                    String view = findDataset(request);
                    if ("".equals(views))
                        views = view;
                    else if (!views.contains(view))
                        views = views + "," + view;
                }
            }
            if (MudrodConstants.FTP_LOG.equals(logType)) {
                ftpRequestCount++;
                String download = "";
                String requestLowercase = request.toLowerCase();
                if (!requestLowercase.endsWith(".jpg") && !requestLowercase.endsWith(".pdf") && !requestLowercase.endsWith(".txt") && !requestLowercase.endsWith(".gif")) {
                    download = request;
                }
                if ("".equals(downloads)) {
                    downloads = download;
                } else {
                    if (!downloads.contains(download)) {
                        downloads = downloads + "," + download;
                    }
                }
            }
        }
        scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
        // Break condition: No hits are returned
        if (scrollResp.getHits().getHits().length == 0) {
            break;
        }
    }
    if (!"".equals(keywords)) {
        keywordsNum = keywords.split(",").length;
    }
    if (searchDataListRequestCount != 0 && searchDataListRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.SEARCH_F)) && searchDataRequestCount != 0 && searchDataRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.VIEW_F)) && ftpRequestCount <= Integer.parseInt(props.getProperty(MudrodConstants.DOWNLOAD_F))) {
        String sessionURL = props.getProperty(MudrodConstants.SESSION_PORT) + props.getProperty(MudrodConstants.SESSION_URL) + "?sessionid=" + sessionId + "&sessionType=" + outputType + "&requestType=" + inputType;
        sessionCount = 1;
        IndexRequest ir = new IndexRequest(logIndex, outputType).source(jsonBuilder().startObject().field("SessionID", sessionId).field("SessionURL", sessionURL).field("Duration", duration).field("Number of Keywords", keywordsNum).field("Time", min).field("End_time", max).field("searchDataListRequest_count", searchDataListRequestCount).field("searchDataListRequest_byKeywords_count", searchDataListRequestByKeywordsCount).field("searchDataRequest_count", searchDataRequestCount).field("keywords", es.customAnalyzing(logIndex, keywords)).field("views", views).field("downloads", downloads).field("request_rate", requestRate).field("Comments", "").field("Validation", 0).field("Produceby", 0).field("Correlation", 0).field("IP", iP).endObject());
        es.getBulkProcessor().add(ir);
    }
    return sessionCount;
}
Also used : Pattern(java.util.regex.Pattern) StatsAggregationBuilder(org.elasticsearch.search.aggregations.metrics.stats.StatsAggregationBuilder) SearchHit(org.elasticsearch.search.SearchHit) Matcher(java.util.regex.Matcher) RequestUrl(org.apache.sdap.mudrod.weblog.structure.RequestUrl) IndexRequest(org.elasticsearch.action.index.IndexRequest) DateTime(org.joda.time.DateTime) SearchResponse(org.elasticsearch.action.search.SearchResponse) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Stats(org.elasticsearch.search.aggregations.metrics.stats.Stats) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) TimeValue(org.elasticsearch.common.unit.TimeValue)

Example 65 with TimeValue

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.common.unit.TimeValue in project incubator-sdap-mudrod by apache.

the class ESDriver method deleteAllByQuery.

public void deleteAllByQuery(String index, String type, QueryBuilder query) {
    ImmutableOpenMap<String, MappingMetaData> mappings = getClient().admin().cluster().prepareState().execute().actionGet().getState().metaData().index(index).getMappings();
    // check if the type exists
    if (!mappings.containsKey(type))
        return;
    createBulkProcessor();
    SearchResponse scrollResp = getClient().prepareSearch(index).setSearchType(SearchType.QUERY_AND_FETCH).setTypes(type).setScroll(new TimeValue(60000)).setQuery(query).setSize(10000).execute().actionGet();
    while (true) {
        for (SearchHit hit : scrollResp.getHits().getHits()) {
            DeleteRequest deleteRequest = new DeleteRequest(index, type, hit.getId());
            getBulkProcessor().add(deleteRequest);
        }
        scrollResp = getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
        if (scrollResp.getHits().getHits().length == 0) {
            break;
        }
    }
    destroyBulkProcessor();
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) MappingMetaData(org.elasticsearch.cluster.metadata.MappingMetaData) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) TimeValue(org.elasticsearch.common.unit.TimeValue) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Aggregations

TimeValue (org.elasticsearch.common.unit.TimeValue)169 SearchResponse (org.elasticsearch.action.search.SearchResponse)37 ArrayList (java.util.ArrayList)28 IOException (java.io.IOException)27 ClusterState (org.elasticsearch.cluster.ClusterState)26 SearchHit (org.elasticsearch.search.SearchHit)26 CountDownLatch (java.util.concurrent.CountDownLatch)18 Settings (org.elasticsearch.common.settings.Settings)18 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)17 Map (java.util.Map)16 Supplier (org.apache.logging.log4j.util.Supplier)16 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)16 List (java.util.List)15 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)15 AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)13 Matchers.containsString (org.hamcrest.Matchers.containsString)13 TimeUnit (java.util.concurrent.TimeUnit)11 ThreadPool (org.elasticsearch.threadpool.ThreadPool)11 HashMap (java.util.HashMap)10 ByteSizeValue (org.elasticsearch.common.unit.ByteSizeValue)10