Search in sources :

Example 1 with SID

use of jcifs.smb.SID in project fess by codelibs.

the class AbstractFessFileTransformer method getRoleTypes.

protected List<String> getRoleTypes(final ResponseData responseData) {
    final List<String> roleTypeList = new ArrayList<>();
    if (fessConfig.isSmbRoleFromFile() && responseData.getUrl().startsWith("smb://")) {
        final SambaHelper sambaHelper = ComponentUtil.getSambaHelper();
        final ACE[] aces = (ACE[]) responseData.getMetaDataMap().get(SmbClient.SMB_ACCESS_CONTROL_ENTRIES);
        if (aces != null) {
            for (final ACE item : aces) {
                final SID sid = item.getSID();
                final String accountId = sambaHelper.getAccountId(sid);
                if (accountId != null) {
                    roleTypeList.add(accountId);
                }
            }
            if (getLogger().isDebugEnabled()) {
                getLogger().debug("smbUrl:" + responseData.getUrl() + " roleType:" + roleTypeList.toString());
            }
        }
    }
    return roleTypeList;
}
Also used : ACE(jcifs.smb.ACE) SambaHelper(org.codelibs.fess.helper.SambaHelper) ArrayList(java.util.ArrayList) SID(jcifs.smb.SID)

Example 2 with SID

use of jcifs.smb.SID in project fess by codelibs.

the class FessCrawlerThread method isContentUpdated.

@Override
protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue<?> urlQueue) {
    if (ComponentUtil.getFessConfig().isIncrementalCrawling()) {
        final long startTime = System.currentTimeMillis();
        final FessConfig fessConfig = ComponentUtil.getFessConfig();
        final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
        final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
        final SambaHelper sambaHelper = ComponentUtil.getSambaHelper();
        final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
        final FessEsClient fessEsClient = ComponentUtil.getFessEsClient();
        final String url = urlQueue.getUrl();
        ResponseData responseData = null;
        try {
            final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(crawlerContext.getSessionId());
            final Map<String, Object> dataMap = new HashMap<>();
            dataMap.put(fessConfig.getIndexFieldUrl(), url);
            final List<String> roleTypeList = new ArrayList<>();
            stream(crawlingConfig.getPermissions()).of(stream -> stream.forEach(p -> roleTypeList.add(p)));
            if (url.startsWith("smb://")) {
                if (url.endsWith("/")) {
                    // directory
                    return true;
                }
                if (fessConfig.isSmbRoleFromFile()) {
                    // head method
                    responseData = client.execute(RequestDataBuilder.newRequestData().head().url(url).build());
                    if (responseData == null) {
                        return true;
                    }
                    final ACE[] aces = (ACE[]) responseData.getMetaDataMap().get(SmbClient.SMB_ACCESS_CONTROL_ENTRIES);
                    if (aces != null) {
                        for (final ACE item : aces) {
                            final SID sid = item.getSID();
                            final String accountId = sambaHelper.getAccountId(sid);
                            if (accountId != null) {
                                roleTypeList.add(accountId);
                            }
                        }
                        if (logger.isDebugEnabled()) {
                            logger.debug("smbUrl:" + responseData.getUrl() + " roleType:" + roleTypeList.toString());
                        }
                    }
                }
            }
            dataMap.put(fessConfig.getIndexFieldRole(), roleTypeList);
            final String id = crawlingInfoHelper.generateId(dataMap);
            if (logger.isDebugEnabled()) {
                logger.debug("Searching indexed document: " + id);
            }
            final Map<String, Object> document = indexingHelper.getDocument(fessEsClient, id, new String[] { fessConfig.getIndexFieldId(), fessConfig.getIndexFieldLastModified(), fessConfig.getIndexFieldAnchor(), fessConfig.getIndexFieldSegment(), fessConfig.getIndexFieldExpires(), fessConfig.getIndexFieldClickCount(), fessConfig.getIndexFieldFavoriteCount() });
            if (document == null) {
                storeChildUrlsToQueue(urlQueue, getChildUrlSet(fessEsClient, id));
                return true;
            }
            final Date expires = DocumentUtil.getValue(document, fessConfig.getIndexFieldExpires(), Date.class);
            if (expires != null && expires.getTime() < System.currentTimeMillis()) {
                final Object idValue = document.get(fessConfig.getIndexFieldId());
                if (idValue != null && !indexingHelper.deleteDocument(fessEsClient, idValue.toString())) {
                    logger.debug("Failed to delete expired document: " + url);
                }
                return true;
            }
            final Date lastModified = DocumentUtil.getValue(document, fessConfig.getIndexFieldLastModified(), Date.class);
            if (lastModified == null) {
                return true;
            }
            urlQueue.setLastModified(lastModified.getTime());
            log(logHelper, LogType.CHECK_LAST_MODIFIED, crawlerContext, urlQueue);
            if (responseData == null) {
                // head method
                responseData = client.execute(RequestDataBuilder.newRequestData().head().url(url).build());
                if (responseData == null) {
                    return true;
                }
            }
            final int httpStatusCode = responseData.getHttpStatusCode();
            if (logger.isDebugEnabled()) {
                logger.debug("Accessing document: " + url + ", status: " + httpStatusCode);
            }
            if (httpStatusCode == 404) {
                storeChildUrlsToQueue(urlQueue, getAnchorSet(document.get(fessConfig.getIndexFieldAnchor())));
                if (!indexingHelper.deleteDocument(fessEsClient, id)) {
                    logger.debug("Failed to delete 404 document: " + url);
                }
                return false;
            } else if (responseData.getLastModified() == null) {
                return true;
            } else if (responseData.getLastModified().getTime() <= lastModified.getTime() && httpStatusCode == 200) {
                log(logHelper, LogType.NOT_MODIFIED, crawlerContext, urlQueue);
                responseData.setExecutionTime(System.currentTimeMillis() - startTime);
                responseData.setParentUrl(urlQueue.getParentUrl());
                responseData.setSessionId(crawlerContext.getSessionId());
                responseData.setHttpStatusCode(org.codelibs.fess.crawler.Constants.NOT_MODIFIED_STATUS);
                processResponse(urlQueue, responseData);
                storeChildUrlsToQueue(urlQueue, getAnchorSet(document.get(fessConfig.getIndexFieldAnchor())));
                final Date documentExpires = crawlingInfoHelper.getDocumentExpires(crawlingConfig);
                if (documentExpires != null && !indexingHelper.updateDocument(fessEsClient, id, fessConfig.getIndexFieldExpires(), documentExpires)) {
                    logger.debug("Failed to update " + fessConfig.getIndexFieldExpires() + " at " + url);
                }
                return false;
            }
        } finally {
            if (responseData != null) {
                IOUtils.closeQuietly(responseData);
            }
        }
    }
    return true;
}
Also used : DocumentUtil(org.codelibs.fess.util.DocumentUtil) CrawlingConfigHelper(org.codelibs.fess.helper.CrawlingConfigHelper) IndexingHelper(org.codelibs.fess.helper.IndexingHelper) Date(java.util.Date) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CrawlingConfig(org.codelibs.fess.es.config.exentity.CrawlingConfig) ArrayList(java.util.ArrayList) CrawlerClient(org.codelibs.fess.crawler.client.CrawlerClient) HashSet(java.util.HashSet) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Map(java.util.Map) LinkedHashSet(java.util.LinkedHashSet) SambaHelper(org.codelibs.fess.helper.SambaHelper) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) LogType(org.codelibs.fess.crawler.log.LogType) Logger(org.slf4j.Logger) FessEsClient(org.codelibs.fess.es.client.FessEsClient) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) StringUtil(org.codelibs.core.lang.StringUtil) SID(jcifs.smb.SID) Set(java.util.Set) ContentNotFoundException(org.codelibs.fess.exception.ContentNotFoundException) Collectors(java.util.stream.Collectors) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) ACE(jcifs.smb.ACE) RequestData(org.codelibs.fess.crawler.entity.RequestData) ComponentUtil(org.codelibs.fess.util.ComponentUtil) CrawlingInfoHelper(org.codelibs.fess.helper.CrawlingInfoHelper) SmbClient(org.codelibs.fess.crawler.client.smb.SmbClient) RequestDataBuilder(org.codelibs.fess.crawler.builder.RequestDataBuilder) UrlQueue(org.codelibs.fess.crawler.entity.UrlQueue) FailureUrlService(org.codelibs.fess.app.service.FailureUrlService) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) ACE(jcifs.smb.ACE) CrawlingConfig(org.codelibs.fess.es.config.exentity.CrawlingConfig) HashMap(java.util.HashMap) SambaHelper(org.codelibs.fess.helper.SambaHelper) FessEsClient(org.codelibs.fess.es.client.FessEsClient) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) ArrayList(java.util.ArrayList) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Date(java.util.Date) SID(jcifs.smb.SID) CrawlingConfigHelper(org.codelibs.fess.helper.CrawlingConfigHelper) IndexingHelper(org.codelibs.fess.helper.IndexingHelper) CrawlingInfoHelper(org.codelibs.fess.helper.CrawlingInfoHelper)

Aggregations

ArrayList (java.util.ArrayList)2 ACE (jcifs.smb.ACE)2 SID (jcifs.smb.SID)2 SambaHelper (org.codelibs.fess.helper.SambaHelper)2 Date (java.util.Date)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 IOUtils (org.apache.commons.io.IOUtils)1 StringUtil (org.codelibs.core.lang.StringUtil)1 StreamUtil.stream (org.codelibs.core.stream.StreamUtil.stream)1 FailureUrlService (org.codelibs.fess.app.service.FailureUrlService)1 RequestDataBuilder (org.codelibs.fess.crawler.builder.RequestDataBuilder)1 CrawlerClient (org.codelibs.fess.crawler.client.CrawlerClient)1 SmbClient (org.codelibs.fess.crawler.client.smb.SmbClient)1 RequestData (org.codelibs.fess.crawler.entity.RequestData)1