use of org.apache.commons.lang3.StringUtils.join in project nifi by apache.
the class LdapUserGroupProvider method load.
/**
* Reloads the tenants.
*/
private void load(final ContextSource contextSource) {
// create the ldapTemplate based on the context source. use a single source context to use the same connection
// to support paging when configured
final SingleContextSource singleContextSource = new SingleContextSource(contextSource.getReadOnlyContext());
final LdapTemplate ldapTemplate = new LdapTemplate(singleContextSource);
try {
final List<User> userList = new ArrayList<>();
final List<Group> groupList = new ArrayList<>();
// group dn -> user identifiers lookup
final Map<String, Set<String>> groupToUserIdentifierMappings = new HashMap<>();
// user dn -> user lookup
final Map<String, User> userLookup = new HashMap<>();
if (performUserSearch) {
// search controls
final SearchControls userControls = new SearchControls();
userControls.setSearchScope(userSearchScope.ordinal());
// consider paging support for users
final DirContextProcessor userProcessor;
if (pageSize == null) {
userProcessor = new NullDirContextProcessor();
} else {
userProcessor = new PagedResultsDirContextProcessor(pageSize);
}
// looking for objects matching the user object class
final AndFilter userFilter = new AndFilter();
userFilter.and(new EqualsFilter("objectClass", userObjectClass));
// if a filter has been provided by the user, we add it to the filter
if (StringUtils.isNotBlank(userSearchFilter)) {
userFilter.and(new HardcodedFilter(userSearchFilter));
}
do {
userList.addAll(ldapTemplate.search(userSearchBase, userFilter.encode(), userControls, new AbstractContextMapper<User>() {
@Override
protected User doMapFromContext(DirContextOperations ctx) {
// get the user identity
final String identity = getUserIdentity(ctx);
// build the user
final User user = new User.Builder().identifierGenerateFromSeed(identity).identity(identity).build();
// store the user for group member later
userLookup.put(getReferencedUserValue(ctx), user);
if (StringUtils.isNotBlank(userGroupNameAttribute)) {
final Attribute attributeGroups = ctx.getAttributes().get(userGroupNameAttribute);
if (attributeGroups == null) {
logger.warn("User group name attribute [" + userGroupNameAttribute + "] does not exist. Ignoring group membership.");
} else {
try {
final NamingEnumeration<String> groupValues = (NamingEnumeration<String>) attributeGroups.getAll();
while (groupValues.hasMoreElements()) {
// store the group -> user identifier mapping
groupToUserIdentifierMappings.computeIfAbsent(groupValues.next(), g -> new HashSet<>()).add(user.getIdentifier());
}
} catch (NamingException e) {
throw new AuthorizationAccessException("Error while retrieving user group name attribute [" + userIdentityAttribute + "].");
}
}
}
return user;
}
}, userProcessor));
} while (hasMorePages(userProcessor));
}
if (performGroupSearch) {
final SearchControls groupControls = new SearchControls();
groupControls.setSearchScope(groupSearchScope.ordinal());
// consider paging support for groups
final DirContextProcessor groupProcessor;
if (pageSize == null) {
groupProcessor = new NullDirContextProcessor();
} else {
groupProcessor = new PagedResultsDirContextProcessor(pageSize);
}
// looking for objects matching the group object class
AndFilter groupFilter = new AndFilter();
groupFilter.and(new EqualsFilter("objectClass", groupObjectClass));
// if a filter has been provided by the user, we add it to the filter
if (StringUtils.isNotBlank(groupSearchFilter)) {
groupFilter.and(new HardcodedFilter(groupSearchFilter));
}
do {
groupList.addAll(ldapTemplate.search(groupSearchBase, groupFilter.encode(), groupControls, new AbstractContextMapper<Group>() {
@Override
protected Group doMapFromContext(DirContextOperations ctx) {
final String dn = ctx.getDn().toString();
// get the group identity
final String name = getGroupName(ctx);
// get the value of this group that may associate it to users
final String referencedGroupValue = getReferencedGroupValue(ctx);
if (!StringUtils.isBlank(groupMemberAttribute)) {
Attribute attributeUsers = ctx.getAttributes().get(groupMemberAttribute);
if (attributeUsers == null) {
logger.warn("Group member attribute [" + groupMemberAttribute + "] does not exist. Ignoring group membership.");
} else {
try {
final NamingEnumeration<String> userValues = (NamingEnumeration<String>) attributeUsers.getAll();
while (userValues.hasMoreElements()) {
final String userValue = userValues.next();
if (performUserSearch) {
// find the user by it's referenced attribute and add the identifier to this group
final User user = userLookup.get(userValue);
// ensure the user is known
if (user != null) {
groupToUserIdentifierMappings.computeIfAbsent(referencedGroupValue, g -> new HashSet<>()).add(user.getIdentifier());
} else {
logger.warn(String.format("%s contains member %s but that user was not found while searching users. Ignoring group membership.", name, userValue));
}
} else {
// since performUserSearch is false, then the referenced group attribute must be blank... the user value must be the dn
final String userDn = userValue;
final String userIdentity;
if (useDnForUserIdentity) {
// use the user value to avoid the unnecessary look up
userIdentity = userDn;
} else {
// lookup the user to extract the user identity
userIdentity = getUserIdentity((DirContextAdapter) ldapTemplate.lookup(userDn));
}
// build the user
final User user = new User.Builder().identifierGenerateFromSeed(userIdentity).identity(userIdentity).build();
// add this user
userList.add(user);
groupToUserIdentifierMappings.computeIfAbsent(referencedGroupValue, g -> new HashSet<>()).add(user.getIdentifier());
}
}
} catch (NamingException e) {
throw new AuthorizationAccessException("Error while retrieving group name attribute [" + groupNameAttribute + "].");
}
}
}
// build this group
final Group.Builder groupBuilder = new Group.Builder().identifierGenerateFromSeed(name).name(name);
// add all users that were associated with this referenced group attribute
if (groupToUserIdentifierMappings.containsKey(referencedGroupValue)) {
groupToUserIdentifierMappings.remove(referencedGroupValue).forEach(userIdentifier -> groupBuilder.addUser(userIdentifier));
}
return groupBuilder.build();
}
}, groupProcessor));
} while (hasMorePages(groupProcessor));
// any remaining groupDn's were referenced by a user but not found while searching groups
groupToUserIdentifierMappings.forEach((referencedGroupValue, userIdentifiers) -> {
logger.warn(String.format("[%s] are members of %s but that group was not found while searching users. Ignoring group membership.", StringUtils.join(userIdentifiers, ", "), referencedGroupValue));
});
} else {
// since performGroupSearch is false, then the referenced user attribute must be blank... the group value must be the dn
// groups are not being searched so lookup any groups identified while searching users
groupToUserIdentifierMappings.forEach((groupDn, userIdentifiers) -> {
final String groupName;
if (useDnForGroupName) {
// use the dn to avoid the unnecessary look up
groupName = groupDn;
} else {
groupName = getGroupName((DirContextAdapter) ldapTemplate.lookup(groupDn));
}
// define the group
final Group.Builder groupBuilder = new Group.Builder().identifierGenerateFromSeed(groupName).name(groupName);
// add each user
userIdentifiers.forEach(userIdentifier -> groupBuilder.addUser(userIdentifier));
// build the group
groupList.add(groupBuilder.build());
});
}
// record the updated tenants
tenants.set(new TenantHolder(new HashSet<>(userList), new HashSet<>(groupList)));
} finally {
singleContextSource.destroy();
}
}
use of org.apache.commons.lang3.StringUtils.join in project nifi by apache.
the class FlowResource method scheduleComponents.
/**
* Updates the specified process group.
*
* @param httpServletRequest request
* @param id The id of the process group.
* @param requestScheduleComponentsEntity A scheduleComponentsEntity.
* @return A processGroupEntity.
*/
@PUT
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@Path("process-groups/{id}")
@ApiOperation(value = "Schedule or unschedule components in the specified Process Group.", response = ScheduleComponentsEntity.class, authorizations = { @Authorization(value = "Read - /flow"), @Authorization(value = "Write - /{component-type}/{uuid} - For every component being scheduled/unscheduled") })
@ApiResponses(value = { @ApiResponse(code = 400, message = "NiFi was unable to complete the request because it was invalid. The request should not be retried without modification."), @ApiResponse(code = 401, message = "Client could not be authenticated."), @ApiResponse(code = 403, message = "Client is not authorized to make this request."), @ApiResponse(code = 404, message = "The specified resource could not be found."), @ApiResponse(code = 409, message = "The request was valid but NiFi was not in the appropriate state to process it. Retrying the same request later may be successful.") })
public Response scheduleComponents(@Context HttpServletRequest httpServletRequest, @ApiParam(value = "The process group id.", required = true) @PathParam("id") String id, @ApiParam(value = "The request to schedule or unschedule. If the comopnents in the request are not specified, all authorized components will be considered.", required = true) final ScheduleComponentsEntity requestScheduleComponentsEntity) {
// ensure the same id is being used
if (!id.equals(requestScheduleComponentsEntity.getId())) {
throw new IllegalArgumentException(String.format("The process group id (%s) in the request body does " + "not equal the process group id of the requested resource (%s).", requestScheduleComponentsEntity.getId(), id));
}
final ScheduledState state;
if (requestScheduleComponentsEntity.getState() == null) {
throw new IllegalArgumentException("The scheduled state must be specified.");
} else {
try {
state = ScheduledState.valueOf(requestScheduleComponentsEntity.getState());
} catch (final IllegalArgumentException iae) {
throw new IllegalArgumentException(String.format("The scheduled must be one of [%s].", StringUtils.join(EnumSet.of(ScheduledState.RUNNING, ScheduledState.STOPPED), ", ")));
}
}
// ensure its a supported scheduled state
if (ScheduledState.DISABLED.equals(state) || ScheduledState.STARTING.equals(state) || ScheduledState.STOPPING.equals(state)) {
throw new IllegalArgumentException(String.format("The scheduled must be one of [%s].", StringUtils.join(EnumSet.of(ScheduledState.RUNNING, ScheduledState.STOPPED), ", ")));
}
// if the components are not specified, gather all components and their current revision
if (requestScheduleComponentsEntity.getComponents() == null) {
// get the current revisions for the components being updated
final Set<Revision> revisions = serviceFacade.getRevisionsFromGroup(id, group -> {
final Set<String> componentIds = new HashSet<>();
// ensure authorized for each processor we will attempt to schedule
group.findAllProcessors().stream().filter(ScheduledState.RUNNING.equals(state) ? ProcessGroup.SCHEDULABLE_PROCESSORS : ProcessGroup.UNSCHEDULABLE_PROCESSORS).filter(processor -> processor.isAuthorized(authorizer, RequestAction.WRITE, NiFiUserUtils.getNiFiUser())).forEach(processor -> {
componentIds.add(processor.getIdentifier());
});
// ensure authorized for each input port we will attempt to schedule
group.findAllInputPorts().stream().filter(ScheduledState.RUNNING.equals(state) ? ProcessGroup.SCHEDULABLE_PORTS : ProcessGroup.UNSCHEDULABLE_PORTS).filter(inputPort -> inputPort.isAuthorized(authorizer, RequestAction.WRITE, NiFiUserUtils.getNiFiUser())).forEach(inputPort -> {
componentIds.add(inputPort.getIdentifier());
});
// ensure authorized for each output port we will attempt to schedule
group.findAllOutputPorts().stream().filter(ScheduledState.RUNNING.equals(state) ? ProcessGroup.SCHEDULABLE_PORTS : ProcessGroup.UNSCHEDULABLE_PORTS).filter(outputPort -> outputPort.isAuthorized(authorizer, RequestAction.WRITE, NiFiUserUtils.getNiFiUser())).forEach(outputPort -> {
componentIds.add(outputPort.getIdentifier());
});
return componentIds;
});
// build the component mapping
final Map<String, RevisionDTO> componentsToSchedule = new HashMap<>();
revisions.forEach(revision -> {
final RevisionDTO dto = new RevisionDTO();
dto.setClientId(revision.getClientId());
dto.setVersion(revision.getVersion());
componentsToSchedule.put(revision.getComponentId(), dto);
});
// set the components and their current revision
requestScheduleComponentsEntity.setComponents(componentsToSchedule);
}
if (isReplicateRequest()) {
return replicate(HttpMethod.PUT, requestScheduleComponentsEntity);
}
final Map<String, RevisionDTO> requestComponentsToSchedule = requestScheduleComponentsEntity.getComponents();
final Map<String, Revision> requestComponentRevisions = requestComponentsToSchedule.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> getRevision(e.getValue(), e.getKey())));
final Set<Revision> requestRevisions = new HashSet<>(requestComponentRevisions.values());
return withWriteLock(serviceFacade, requestScheduleComponentsEntity, requestRevisions, lookup -> {
// ensure access to the flow
authorizeFlow();
// ensure access to every component being scheduled
requestComponentsToSchedule.keySet().forEach(componentId -> {
final Authorizable connectable = lookup.getLocalConnectable(componentId);
connectable.authorize(authorizer, RequestAction.WRITE, NiFiUserUtils.getNiFiUser());
});
}, () -> serviceFacade.verifyScheduleComponents(id, state, requestComponentRevisions.keySet()), (revisions, scheduleComponentsEntity) -> {
final ScheduledState scheduledState = ScheduledState.valueOf(scheduleComponentsEntity.getState());
final Map<String, RevisionDTO> componentsToSchedule = scheduleComponentsEntity.getComponents();
final Map<String, Revision> componentRevisions = componentsToSchedule.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> getRevision(e.getValue(), e.getKey())));
// update the process group
final ScheduleComponentsEntity entity = serviceFacade.scheduleComponents(id, scheduledState, componentRevisions);
return generateOkResponse(entity).build();
});
}
use of org.apache.commons.lang3.StringUtils.join in project nifi by apache.
the class ControllerFacade method setComponentDetails.
private void setComponentDetails(final ProvenanceEventDTO dto) {
final ProcessGroup root = flowController.getGroup(flowController.getRootGroupId());
final Connectable connectable = root.findLocalConnectable(dto.getComponentId());
if (connectable != null) {
dto.setGroupId(connectable.getProcessGroup().getIdentifier());
dto.setComponentName(connectable.getName());
return;
}
final RemoteGroupPort remoteGroupPort = root.findRemoteGroupPort(dto.getComponentId());
if (remoteGroupPort != null) {
dto.setGroupId(remoteGroupPort.getProcessGroupIdentifier());
dto.setComponentName(remoteGroupPort.getName());
return;
}
final Connection connection = root.findConnection(dto.getComponentId());
if (connection != null) {
dto.setGroupId(connection.getProcessGroup().getIdentifier());
String name = connection.getName();
final Collection<Relationship> relationships = connection.getRelationships();
if (StringUtils.isBlank(name) && CollectionUtils.isNotEmpty(relationships)) {
name = StringUtils.join(relationships.stream().map(relationship -> relationship.getName()).collect(Collectors.toSet()), ", ");
}
dto.setComponentName(name);
return;
}
}
use of org.apache.commons.lang3.StringUtils.join in project swift by luastar.
the class ExcelUtils method readXlsxSheet.
/**
* 从sheet中读取数据
*
* @param workbook
* @param sheetConfig
* @throws Exception
*/
private static void readXlsxSheet(XSSFWorkbook workbook, ImportSheet sheetConfig) throws Exception {
if (workbook == null || sheetConfig == null || sheetConfig.getDataClass() == null || sheetConfig.getColumnList() == null) {
throw new IllegalArgumentException("excel导入参数错误!");
}
// 公式执行器
CreationHelper createHelper = workbook.getCreationHelper();
FormulaEvaluator formulaEvaluator = createHelper.createFormulaEvaluator();
int sheetNum = workbook.getNumberOfSheets();
if (sheetConfig.getIndex() >= sheetNum) {
String msg = StrUtils.formatString("sheet【{0}】不存在", sheetConfig.getIndex() + 1);
throw new RuntimeException(msg);
}
XSSFSheet sheet = workbook.getSheetAt(sheetConfig.getIndex());
int firstRowNum = sheet.getFirstRowNum();
int lastRowNum = sheet.getLastRowNum();
if (lastRowNum < 1) {
String msg = StrUtils.formatString("sheet【{0}】数据为空", sheet.getSheetName());
throw new RuntimeException(msg);
}
// 通过标题找对应的列
List<String> columnNotFound = Lists.newArrayList();
List<ImportColumn> columnList = sheetConfig.getColumnList();
int columnNum = columnList.size();
XSSFRow titleRow = sheet.getRow(firstRowNum);
for (int i = 0; i < columnNum; i++) {
ImportColumn column = columnList.get(i);
for (int j = 0; j < columnNum; j++) {
XSSFCell cell = titleRow.getCell(j);
if (cell != null && column.getTitle().equals(cell.getStringCellValue())) {
column.setColumnIndex(j);
}
}
if (column.getColumnIndex() == null) {
columnNotFound.add(column.getTitle());
}
}
// 找不到对应的列
if (columnNotFound.size() > 0) {
String msg = StrUtils.formatString("列【{0}】不存在", StringUtils.join(columnNotFound, ","));
throw new RuntimeException(msg);
}
// 获取数据
List<ExcelData> dataList = Lists.newArrayList();
for (int i = firstRowNum + 1; i <= lastRowNum; i++) {
XSSFRow row = sheet.getRow(i);
Object data = sheetConfig.getDataClass().newInstance();
if (row == null) {
ExcelData excelData = new ExcelData(i, data);
excelData.setCheckMsg("获取行数据为空");
dataList.add(excelData);
continue;
}
// 行不为空
List<String> setPropList = Lists.newArrayList();
for (int j = 0; j < columnNum; j++) {
ImportColumn column = columnList.get(j);
XSSFCell cell = row.getCell(column.getColumnIndex());
setPropList.add(ExcelUtils.setProperty(column, cell, data, formulaEvaluator));
}
ExcelData excelData = new ExcelData(i, data);
// 赋值失败的列
List setErrList = setPropList.stream().filter(rs -> rs != null).collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(setErrList)) {
String msg = StrUtils.formatString("获取以下属性的值失败:{0}", StringUtils.join(setErrList, ","));
excelData.setCheckMsg(msg);
}
dataList.add(excelData);
}
sheetConfig.setDataList(dataList);
}
use of org.apache.commons.lang3.StringUtils.join in project fess by codelibs.
the class AbstractFessFileTransformer method generateData.
protected Map<String, Object> generateData(final ResponseData responseData) {
final Extractor extractor = getExtractor(responseData);
final Map<String, String> params = new HashMap<>();
params.put(TikaMetadataKeys.RESOURCE_NAME_KEY, getResourceName(responseData));
final String mimeType = responseData.getMimeType();
params.put(HttpHeaders.CONTENT_TYPE, mimeType);
params.put(HttpHeaders.CONTENT_ENCODING, responseData.getCharSet());
final StringBuilder contentMetaBuf = new StringBuilder(1000);
final Map<String, Object> dataMap = new HashMap<>();
final Map<String, Object> metaDataMap = new HashMap<>();
String content;
try (final InputStream in = responseData.getResponseBody()) {
final ExtractData extractData = getExtractData(extractor, in, params);
content = extractData.getContent();
if (fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
return null;
}
if (getLogger().isDebugEnabled()) {
getLogger().debug("ExtractData: " + extractData);
}
// meta
//
extractData.getKeySet().stream().filter(//
k -> extractData.getValues(k) != null).forEach(key -> {
final String[] values = extractData.getValues(key);
metaDataMap.put(key, values);
if (fessConfig.isCrawlerMetadataContentIncluded(key)) {
final String joinedValue = StringUtils.join(values, ' ');
if (StringUtil.isNotBlank(joinedValue)) {
if (contentMetaBuf.length() > 0) {
contentMetaBuf.append(' ');
}
contentMetaBuf.append(joinedValue.trim());
}
}
final Pair<String, String> mapping = fessConfig.getCrawlerMetadataNameMapping(key);
if (mapping != null) {
if (Constants.MAPPING_TYPE_ARRAY.equalsIgnoreCase(mapping.getSecond())) {
dataMap.put(mapping.getFirst(), values);
} else if (Constants.MAPPING_TYPE_STRING.equalsIgnoreCase(mapping.getSecond())) {
final String joinedValue = StringUtils.join(values, ' ');
dataMap.put(mapping.getFirst(), joinedValue.trim());
} else if (values.length == 1) {
try {
if (Constants.MAPPING_TYPE_LONG.equalsIgnoreCase(mapping.getSecond())) {
dataMap.put(mapping.getFirst(), Long.parseLong(values[0]));
} else if (Constants.MAPPING_TYPE_DOUBLE.equalsIgnoreCase(mapping.getSecond())) {
dataMap.put(mapping.getFirst(), Double.parseDouble(values[0]));
} else {
logger.warn("Unknown mapping type: {}={}", key, mapping);
}
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + values[0], e);
}
}
}
});
} catch (final Exception e) {
final CrawlingAccessException rcae = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e);
rcae.setLogLevel(CrawlingAccessException.WARN);
throw rcae;
}
if (content == null) {
content = StringUtil.EMPTY;
}
final String contentMeta = contentMetaBuf.toString().trim();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
final String sessionId = crawlingInfoHelper.getCanonicalSessionId(responseData.getSessionId());
final PathMappingHelper pathMappingHelper = ComponentUtil.getPathMappingHelper();
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(responseData.getSessionId());
final Date documentExpires = crawlingInfoHelper.getDocumentExpires(crawlingConfig);
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final FileTypeHelper fileTypeHelper = ComponentUtil.getFileTypeHelper();
final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper();
String url = responseData.getUrl();
final String indexingTarget = crawlingConfig.getIndexingTarget(url);
url = pathMappingHelper.replaceUrl(sessionId, url);
final Map<String, String> fieldConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.FIELD);
String urlEncoding;
final UrlQueue<?> urlQueue = CrawlingParameterUtil.getUrlQueue();
if (urlQueue != null && urlQueue.getEncoding() != null) {
urlEncoding = urlQueue.getEncoding();
} else {
urlEncoding = responseData.getCharSet();
}
// cid
final String configId = crawlingConfig.getConfigId();
if (configId != null) {
putResultDataBody(dataMap, fessConfig.getIndexFieldConfigId(), configId);
}
// expires
if (documentExpires != null) {
putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires);
}
// segment
putResultDataBody(dataMap, fessConfig.getIndexFieldSegment(), sessionId);
// content
final StringBuilder buf = new StringBuilder(content.length() + 1000);
if (fessConfig.isCrawlerDocumentFileAppendBodyContent()) {
buf.append(content);
}
if (fessConfig.isCrawlerDocumentFileAppendMetaContent()) {
if (buf.length() > 0) {
buf.append(' ');
}
buf.append(contentMeta);
}
final String bodyBase = buf.toString().trim();
final String body = documentHelper.getContent(responseData, bodyBase, dataMap);
putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), body);
if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache())) || fessConfig.isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) {
if (responseData.getContentLength() > 0 && responseData.getContentLength() <= fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) {
final String cache = content.trim().replaceAll("[ \\t\\x0B\\f]+", " ");
// text cache
putResultDataBody(dataMap, fessConfig.getIndexFieldCache(), cache);
putResultDataBody(dataMap, fessConfig.getIndexFieldHasCache(), Constants.TRUE);
}
}
// digest
putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(), documentHelper.getDigest(responseData, bodyBase, dataMap, fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger()));
// title
final String fileName = getFileName(url, urlEncoding);
if (!dataMap.containsKey(fessConfig.getIndexFieldTitle())) {
if (url.endsWith("/")) {
if (StringUtil.isNotBlank(content)) {
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), documentHelper.getDigest(responseData, body, dataMap, fessConfig.getCrawlerDocumentFileMaxTitleLengthAsInteger()));
} else {
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), fessConfig.getCrawlerDocumentFileNoTitleLabel());
}
} else {
if (StringUtil.isBlank(fileName)) {
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), decodeUrlAsName(url, url.startsWith("file:")));
} else {
putResultDataBody(dataMap, fessConfig.getIndexFieldTitle(), fileName);
}
}
}
// host
putResultDataBody(dataMap, fessConfig.getIndexFieldHost(), getHostOnFile(url));
// site
putResultDataBody(dataMap, fessConfig.getIndexFieldSite(), getSiteOnFile(url, urlEncoding));
// filename
if (StringUtil.isNotBlank(fileName)) {
putResultDataBody(dataMap, fessConfig.getIndexFieldFilename(), fileName);
}
// url
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url);
// created
final Date now = systemHelper.getCurrentTime();
putResultDataBody(dataMap, fessConfig.getIndexFieldCreated(), now);
// TODO anchor
putResultDataBody(dataMap, fessConfig.getIndexFieldAnchor(), StringUtil.EMPTY);
// mimetype
putResultDataBody(dataMap, fessConfig.getIndexFieldMimetype(), mimeType);
if (fileTypeHelper != null) {
// filetype
putResultDataBody(dataMap, fessConfig.getIndexFieldFiletype(), fileTypeHelper.get(mimeType));
}
// content_length
putResultDataBody(dataMap, fessConfig.getIndexFieldContentLength(), Long.toString(responseData.getContentLength()));
// last_modified
final Date lastModified = responseData.getLastModified();
if (lastModified != null) {
putResultDataBody(dataMap, fessConfig.getIndexFieldLastModified(), lastModified);
// timestamp
putResultDataBody(dataMap, fessConfig.getIndexFieldTimestamp(), lastModified);
} else {
// timestamp
putResultDataBody(dataMap, fessConfig.getIndexFieldTimestamp(), now);
}
// indexingTarget
putResultDataBody(dataMap, Constants.INDEXING_TARGET, indexingTarget);
// boost
putResultDataBody(dataMap, fessConfig.getIndexFieldBoost(), crawlingConfig.getDocumentBoost());
// label: labelType
final Set<String> labelTypeSet = new HashSet<>();
for (final String labelType : crawlingConfig.getLabelTypeValues()) {
labelTypeSet.add(labelType);
}
final LabelTypeHelper labelTypeHelper = ComponentUtil.getLabelTypeHelper();
labelTypeSet.addAll(labelTypeHelper.getMatchedLabelValueSet(url));
putResultDataBody(dataMap, fessConfig.getIndexFieldLabel(), labelTypeSet);
// role: roleType
final List<String> roleTypeList = getRoleTypes(responseData);
stream(crawlingConfig.getPermissions()).of(stream -> stream.forEach(p -> roleTypeList.add(p)));
putResultDataBody(dataMap, fessConfig.getIndexFieldRole(), roleTypeList);
// lang
if (StringUtil.isNotBlank(fessConfig.getCrawlerDocumentFileDefaultLang())) {
putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), fessConfig.getCrawlerDocumentFileDefaultLang());
}
// id
putResultDataBody(dataMap, fessConfig.getIndexFieldId(), crawlingInfoHelper.generateId(dataMap));
// parentId
String parentUrl = responseData.getParentUrl();
if (StringUtil.isNotBlank(parentUrl)) {
parentUrl = pathMappingHelper.replaceUrl(sessionId, parentUrl);
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), parentUrl);
putResultDataBody(dataMap, fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(dataMap));
// set again
putResultDataBody(dataMap, fessConfig.getIndexFieldUrl(), url);
}
// from config
final Map<String, String> scriptConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.SCRIPT);
final Map<String, String> metaConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.META);
for (final Map.Entry<String, String> entry : metaConfigMap.entrySet()) {
final String key = entry.getKey();
final String[] values = entry.getValue().split(",");
for (final String value : values) {
putResultDataWithTemplate(dataMap, key, metaDataMap.get(value), scriptConfigMap.get(key));
}
}
final Map<String, String> valueConfigMap = crawlingConfig.getConfigParameterMap(ConfigName.VALUE);
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
final String key = entry.getKey();
putResultDataWithTemplate(dataMap, key, entry.getValue(), scriptConfigMap.get(key));
}
return dataMap;
}
Aggregations