Search in sources :

Example 26 with StringUtils.join

use of org.apache.commons.lang3.StringUtils.join in project asterixdb by apache.

the class AbstractExecutionIT method setUp.

@BeforeClass
public static void setUp() throws Exception {
    System.out.println("Starting setup");
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Starting setup");
    }
    File outdir = new File(PATH_ACTUAL);
    outdir.mkdirs();
    HDFSCluster.getInstance().setup(HDFS_BASE);
    //This is nasty but there is no very nice way to set a system property on each NC that I can figure.
    //The main issue is that we need the NC resolver to be the IdentityResolver and not the DNSResolver.
    FileUtils.copyFile(new File(StringUtils.join(new String[] { "src", "test", "resources", "integrationts", "asterix-configuration.xml" }, File.separator)), new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/conf/asterix-configuration.xml"));
    AsterixLifecycleIT.setUp();
    File externalTestsJar = new File(StringUtils.join(new String[] { "..", "asterix-external-data", "target" }, File.separator)).listFiles((dir, name) -> name.matches("asterix-external-data-.*-tests.jar"))[0];
    FileUtils.copyFile(externalTestsJar, new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/clusters/local/working_dir/asterix/repo/", externalTestsJar.getName()));
    AsterixLifecycleIT.restartInstance();
    FileUtils.copyDirectoryStructure(new File(FileUtil.joinPath("..", "asterix-app", "data")), new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/clusters/local/working_dir/data"));
    FileUtils.copyDirectoryStructure(new File(FileUtil.joinPath("..", "asterix-app", "target", "data")), new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/clusters/local/working_dir/target/data"));
    FileUtils.copyDirectoryStructure(new File(FileUtil.joinPath("target", "data")), new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/clusters/local/working_dir/target/data/csv"));
    // Set the node resolver to be the identity resolver that expects node names
    // to be node controller ids; a valid assumption in test environment.
    System.setProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY, IdentitiyResolverFactory.class.getName());
    reportPath = new File(FileUtil.joinPath("target", "failsafe-reports")).getAbsolutePath();
}
Also used : BeforeClass(org.junit.BeforeClass) TestRule(org.junit.rules.TestRule) CompilationUnit(org.apache.asterix.testframework.xml.TestCase.CompilationUnit) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) ExternalDataConstants(org.apache.asterix.external.util.ExternalDataConstants) TestExecutor(org.apache.asterix.test.common.TestExecutor) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) RetainLogsRule(org.apache.asterix.test.base.RetainLogsRule) Parameterized(org.junit.runners.Parameterized) AfterClass(org.junit.AfterClass) TestGroup(org.apache.asterix.testframework.xml.TestGroup) Collection(java.util.Collection) Test(org.junit.Test) Logger(java.util.logging.Logger) TestCaseContext(org.apache.asterix.testframework.context.TestCaseContext) File(java.io.File) FileUtil(org.apache.hyracks.util.file.FileUtil) List(java.util.List) FileUtils(org.codehaus.plexus.util.FileUtils) Rule(org.junit.Rule) HDFSCluster(org.apache.asterix.test.runtime.HDFSCluster) IdentitiyResolverFactory(org.apache.asterix.external.util.IdentitiyResolverFactory) TestFileContext(org.apache.asterix.testframework.context.TestFileContext) File(java.io.File) IdentitiyResolverFactory(org.apache.asterix.external.util.IdentitiyResolverFactory) BeforeClass(org.junit.BeforeClass)

Example 27 with StringUtils.join

use of org.apache.commons.lang3.StringUtils.join in project fess by codelibs.

the class CrawlJob method executeCrawler.

protected void executeCrawler() {
    final List<String> cmdList = new ArrayList<>();
    final String cpSeparator = SystemUtils.IS_OS_WINDOWS ? ";" : ":";
    final ServletContext servletContext = ComponentUtil.getComponent(ServletContext.class);
    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
    final ProcessHelper processHelper = ComponentUtil.getProcessHelper();
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    cmdList.add(fessConfig.getJavaCommandPath());
    // -cp
    cmdList.add("-cp");
    final StringBuilder buf = new StringBuilder(100);
    final String confPath = System.getProperty(Constants.FESS_CONF_PATH);
    if (StringUtil.isNotBlank(confPath)) {
        buf.append(confPath);
        buf.append(cpSeparator);
    }
    // WEB-INF/crawler/resources
    buf.append("WEB-INF");
    buf.append(File.separator);
    buf.append("crawler");
    buf.append(File.separator);
    buf.append("resources");
    buf.append(cpSeparator);
    // WEB-INF/classes
    buf.append("WEB-INF");
    buf.append(File.separator);
    buf.append("classes");
    // target/classes
    final String userDir = System.getProperty("user.dir");
    final File targetDir = new File(userDir, "target");
    final File targetClassesDir = new File(targetDir, "classes");
    if (targetClassesDir.isDirectory()) {
        buf.append(cpSeparator);
        buf.append(targetClassesDir.getAbsolutePath());
    }
    // WEB-INF/lib
    appendJarFile(cpSeparator, buf, new File(servletContext.getRealPath("/WEB-INF/lib")), "WEB-INF/lib" + File.separator);
    // WEB-INF/crawler/lib
    appendJarFile(cpSeparator, buf, new File(servletContext.getRealPath("/WEB-INF/crawler/lib")), "WEB-INF/crawler" + File.separator + "lib" + File.separator);
    final File targetLibDir = new File(targetDir, "fess" + File.separator + "WEB-INF" + File.separator + "lib");
    if (targetLibDir.isDirectory()) {
        appendJarFile(cpSeparator, buf, targetLibDir, targetLibDir.getAbsolutePath() + File.separator);
    }
    cmdList.add(buf.toString());
    if (useLocalElasticsearch) {
        final String transportAddresses = System.getProperty(Constants.FESS_ES_TRANSPORT_ADDRESSES);
        if (StringUtil.isNotBlank(transportAddresses)) {
            cmdList.add("-D" + Constants.FESS_ES_TRANSPORT_ADDRESSES + "=" + transportAddresses);
        }
        final String clusterName = System.getProperty(Constants.FESS_ES_CLUSTER_NAME);
        if (StringUtil.isNotBlank(clusterName)) {
            cmdList.add("-D" + Constants.FESS_ES_CLUSTER_NAME + "=" + clusterName);
        }
    }
    final String systemLastaEnv = System.getProperty("lasta.env");
    if (StringUtil.isNotBlank(systemLastaEnv)) {
        if (systemLastaEnv.equals("web")) {
            cmdList.add("-Dlasta.env=crawler");
        } else {
            cmdList.add("-Dlasta.env=" + systemLastaEnv);
        }
    } else if (StringUtil.isNotBlank(lastaEnv)) {
        cmdList.add("-Dlasta.env=" + lastaEnv);
    }
    cmdList.add("-Dfess.crawler.process=true");
    cmdList.add("-Dfess.log.path=" + (logFilePath != null ? logFilePath : systemHelper.getLogFilePath()));
    addSystemProperty(cmdList, "fess.log.name", "fess-crawler", "-crawler");
    if (logLevel == null) {
        addSystemProperty(cmdList, "fess.log.level", null, null);
    } else {
        cmdList.add("-Dfess.log.level=" + logLevel);
    }
    stream(fessConfig.getJvmCrawlerOptionsAsArray()).of(stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));
    File ownTmpDir = null;
    final String tmpDir = System.getProperty("java.io.tmpdir");
    if (fessConfig.isUseOwnTmpDir() && StringUtil.isNotBlank(tmpDir)) {
        ownTmpDir = new File(tmpDir, "fessTmpDir_" + sessionId);
        if (ownTmpDir.mkdirs()) {
            cmdList.add("-Djava.io.tmpdir=" + ownTmpDir.getAbsolutePath());
            cmdList.add("-Dpdfbox.fontcache=" + ownTmpDir.getAbsolutePath());
        } else {
            ownTmpDir = null;
        }
    }
    cmdList.add(ComponentUtil.getThumbnailManager().getThumbnailPathOption());
    if (StringUtil.isNotBlank(jvmOptions)) {
        split(jvmOptions, " ").of(stream -> stream.filter(StringUtil::isNotBlank).forEach(s -> cmdList.add(s)));
    }
    cmdList.add(Crawler.class.getCanonicalName());
    cmdList.add("--sessionId");
    cmdList.add(sessionId);
    cmdList.add("--name");
    cmdList.add(namespace);
    if (webConfigIds != null && webConfigIds.length > 0) {
        cmdList.add("-w");
        cmdList.add(StringUtils.join(webConfigIds, ','));
    }
    if (fileConfigIds != null && fileConfigIds.length > 0) {
        cmdList.add("-f");
        cmdList.add(StringUtils.join(fileConfigIds, ','));
    }
    if (dataConfigIds != null && dataConfigIds.length > 0) {
        cmdList.add("-d");
        cmdList.add(StringUtils.join(dataConfigIds, ','));
    }
    if (documentExpires >= -1) {
        cmdList.add("-e");
        cmdList.add(Integer.toString(documentExpires));
    }
    File propFile = null;
    try {
        cmdList.add("-p");
        propFile = File.createTempFile("crawler_", ".properties");
        cmdList.add(propFile.getAbsolutePath());
        try (FileOutputStream out = new FileOutputStream(propFile)) {
            final Properties prop = new Properties();
            prop.putAll(ComponentUtil.getSystemProperties());
            prop.store(out, cmdList.toString());
        }
        final File baseDir = new File(servletContext.getRealPath("/WEB-INF")).getParentFile();
        if (logger.isInfoEnabled()) {
            logger.info("Crawler: \nDirectory=" + baseDir + "\nOptions=" + cmdList);
        }
        final JobProcess jobProcess = processHelper.startProcess(sessionId, cmdList, pb -> {
            pb.directory(baseDir);
            pb.redirectErrorStream(true);
        });
        final InputStreamThread it = jobProcess.getInputStreamThread();
        it.start();
        final Process currentProcess = jobProcess.getProcess();
        currentProcess.waitFor();
        it.join(5000);
        final int exitValue = currentProcess.exitValue();
        if (logger.isInfoEnabled()) {
            logger.info("Crawler: Exit Code=" + exitValue + " - Crawler Process Output:\n" + it.getOutput());
        }
        if (exitValue != 0) {
            throw new FessSystemException("Exit Code: " + exitValue + "\nOutput:\n" + it.getOutput());
        }
    } catch (final FessSystemException e) {
        throw e;
    } catch (final InterruptedException e) {
        logger.warn("Crawler Process interrupted.");
    } catch (final Exception e) {
        throw new FessSystemException("Crawler Process terminated.", e);
    } finally {
        try {
            processHelper.destroyProcess(sessionId);
        } finally {
            if (propFile != null && !propFile.delete()) {
                logger.warn("Failed to delete {}.", propFile.getAbsolutePath());
            }
            deleteTempDir(ownTmpDir);
        }
    }
}
Also used : FilenameFilter(java.io.FilenameFilter) Constants(org.codelibs.fess.Constants) FessSystemException(org.codelibs.fess.exception.FessSystemException) Date(java.util.Date) InputStreamThread(org.codelibs.fess.util.InputStreamThread) LoggerFactory(org.slf4j.LoggerFactory) SimpleDateFormat(java.text.SimpleDateFormat) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) StreamUtil.split(org.codelibs.core.stream.StreamUtil.split) JobProcess(org.codelibs.fess.util.JobProcess) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) Properties(java.util.Properties) ProcessHelper(org.codelibs.fess.helper.ProcessHelper) Logger(org.slf4j.Logger) SystemUtils(org.apache.commons.lang3.SystemUtils) StringUtil(org.codelibs.core.lang.StringUtil) FileOutputStream(java.io.FileOutputStream) FileUtils(org.apache.commons.io.FileUtils) File(java.io.File) List(java.util.List) ComponentUtil(org.codelibs.fess.util.ComponentUtil) SystemHelper(org.codelibs.fess.helper.SystemHelper) Crawler(org.codelibs.fess.exec.Crawler) ServletContext(javax.servlet.ServletContext) ArrayList(java.util.ArrayList) JobProcess(org.codelibs.fess.util.JobProcess) Crawler(org.codelibs.fess.exec.Crawler) Properties(java.util.Properties) ProcessHelper(org.codelibs.fess.helper.ProcessHelper) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) JobProcess(org.codelibs.fess.util.JobProcess) FessSystemException(org.codelibs.fess.exception.FessSystemException) FessSystemException(org.codelibs.fess.exception.FessSystemException) InputStreamThread(org.codelibs.fess.util.InputStreamThread) SystemHelper(org.codelibs.fess.helper.SystemHelper) FileOutputStream(java.io.FileOutputStream) ServletContext(javax.servlet.ServletContext) StringUtil(org.codelibs.core.lang.StringUtil) File(java.io.File)

Example 28 with StringUtils.join

use of org.apache.commons.lang3.StringUtils.join in project searchcode-server by boyter.

the class CodeRouteService method html.

public ModelAndView html(Request request, Response response) {
    Repo repo = Singleton.getRepo();
    Data data = Singleton.getData();
    SearchcodeLib scl = Singleton.getSearchcodeLib(data);
    CodeSearcher cs = new CodeSearcher();
    CodeMatcher cm = new CodeMatcher(data);
    Map<String, Object> map = new HashMap<>();
    map.put("repoCount", repo.getRepoCount());
    if (request.queryParams().contains("q")) {
        String query = request.queryParams("q").trim();
        String altquery = query.replaceAll("[^A-Za-z0-9 ]", " ").trim().replaceAll(" +", " ");
        int page = 0;
        if (request.queryParams().contains("p")) {
            try {
                page = Integer.parseInt(request.queryParams("p"));
                page = page > 19 ? 19 : page;
            } catch (NumberFormatException ex) {
                page = 0;
            }
        }
        String[] repos = new String[0];
        String[] langs = new String[0];
        String[] owners = new String[0];
        String reposFilter = Values.EMPTYSTRING;
        String langsFilter = Values.EMPTYSTRING;
        String ownersFilter = Values.EMPTYSTRING;
        String reposQueryString = Values.EMPTYSTRING;
        String langsQueryString = Values.EMPTYSTRING;
        String ownsQueryString = Values.EMPTYSTRING;
        if (request.queryParams().contains("repo")) {
            repos = request.queryParamsValues("repo");
            if (repos.length != 0) {
                List<String> reposList = Arrays.asList(repos).stream().map((s) -> "reponame:" + QueryParser.escape(s)).collect(Collectors.toList());
                reposFilter = " && (" + StringUtils.join(reposList, " || ") + ")";
                List<String> reposQueryList = Arrays.asList(repos).stream().map((s) -> "&repo=" + URLEncoder.encode(s)).collect(Collectors.toList());
                reposQueryString = StringUtils.join(reposQueryList, "");
            }
        }
        if (request.queryParams().contains("lan")) {
            langs = request.queryParamsValues("lan");
            if (langs.length != 0) {
                List<String> langsList = Arrays.asList(langs).stream().map((s) -> "languagename:" + QueryParser.escape(s)).collect(Collectors.toList());
                langsFilter = " && (" + StringUtils.join(langsList, " || ") + ")";
                List<String> langsQueryList = Arrays.asList(langs).stream().map((s) -> "&lan=" + URLEncoder.encode(s)).collect(Collectors.toList());
                langsQueryString = StringUtils.join(langsQueryList, "");
            }
        }
        if (request.queryParams().contains("own")) {
            owners = request.queryParamsValues("own");
            if (owners.length != 0) {
                List<String> ownersList = Arrays.asList(owners).stream().map((s) -> "codeowner:" + QueryParser.escape(s)).collect(Collectors.toList());
                ownersFilter = " && (" + StringUtils.join(ownersList, " || ") + ")";
                List<String> ownsQueryList = Arrays.asList(owners).stream().map((s) -> "&own=" + URLEncoder.encode(s)).collect(Collectors.toList());
                ownsQueryString = StringUtils.join(ownsQueryList, "");
            }
        }
        // split the query escape it and and it together
        String cleanQueryString = scl.formatQueryString(query);
        SearchResult searchResult = cs.search(cleanQueryString + reposFilter + langsFilter + ownersFilter, page);
        searchResult.setCodeResultList(cm.formatResults(searchResult.getCodeResultList(), query, true));
        for (CodeFacetRepo f : searchResult.getRepoFacetResults()) {
            if (Arrays.asList(repos).contains(f.getRepoName())) {
                f.setSelected(true);
            }
        }
        for (CodeFacetLanguage f : searchResult.getLanguageFacetResults()) {
            if (Arrays.asList(langs).contains(f.getLanguageName())) {
                f.setSelected(true);
            }
        }
        for (CodeFacetOwner f : searchResult.getOwnerFacetResults()) {
            if (Arrays.asList(owners).contains(f.getOwner())) {
                f.setSelected(true);
            }
        }
        map.put("searchValue", query);
        map.put("searchResult", searchResult);
        map.put("reposQueryString", reposQueryString);
        map.put("langsQueryString", langsQueryString);
        map.put("ownsQueryString", ownsQueryString);
        map.put("altQuery", altquery);
        map.put("totalPages", searchResult.getPages().size());
        map.put("isHtml", true);
        map.put("logoImage", CommonRouteService.getLogo());
        map.put("isCommunity", App.ISCOMMUNITY);
        map.put(Values.EMBED, Singleton.getData().getDataByName(Values.EMBED, Values.EMPTYSTRING));
        return new ModelAndView(map, "searchresults.ftl");
    }
    map.put("photoId", CommonRouteService.getPhotoId());
    map.put("numDocs", cs.getTotalNumberDocumentsIndexed());
    map.put("logoImage", CommonRouteService.getLogo());
    map.put("isCommunity", App.ISCOMMUNITY);
    map.put(Values.EMBED, Singleton.getData().getDataByName(Values.EMBED, Values.EMPTYSTRING));
    return new ModelAndView(map, "index.ftl");
}
Also used : java.util(java.util) Singleton(com.searchcode.app.service.Singleton) Spark.halt(spark.Spark.halt) RepoResult(com.searchcode.app.model.RepoResult) ModelAndView(spark.ModelAndView) Repo(com.searchcode.app.dao.Repo) Collectors(java.util.stream.Collectors) StringUtils(org.apache.commons.lang3.StringUtils) Values(com.searchcode.app.config.Values) CodeMatcher(com.searchcode.app.service.CodeMatcher) com.searchcode.app.util(com.searchcode.app.util) CommonRouteService(com.searchcode.app.service.route.CommonRouteService) URLEncoder(java.net.URLEncoder) com.searchcode.app.dto(com.searchcode.app.dto) CodeSearcher(com.searchcode.app.service.CodeSearcher) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Gson(com.google.gson.Gson) Request(spark.Request) Data(com.searchcode.app.dao.Data) Response(spark.Response) Properties(com.searchcode.app.util.Properties) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) App(com.searchcode.app.App) ModelAndView(spark.ModelAndView) Data(com.searchcode.app.dao.Data) CodeSearcher(com.searchcode.app.service.CodeSearcher) Repo(com.searchcode.app.dao.Repo) CodeMatcher(com.searchcode.app.service.CodeMatcher)

Example 29 with StringUtils.join

use of org.apache.commons.lang3.StringUtils.join in project searchcode-server by boyter.

the class SearchRouteService method codeSearch.

public SearchResult codeSearch(Request request, Response response) {
    CodeSearcher cs = new CodeSearcher();
    CodeMatcher cm = new CodeMatcher(Singleton.getData());
    SearchcodeLib scl = Singleton.getSearchcodeLib(Singleton.getData());
    if (request.queryParams().contains("q") && !request.queryParams("q").trim().equals(Values.EMPTYSTRING)) {
        String query = request.queryParams("q").trim();
        int page = 0;
        if (request.queryParams().contains("p")) {
            try {
                page = Integer.parseInt(request.queryParams("p"));
                page = page > 19 ? 19 : page;
            } catch (NumberFormatException ex) {
                page = 0;
            }
        }
        String[] repos;
        String[] langs;
        String[] owners;
        String reposFilter = Values.EMPTYSTRING;
        String langsFilter = Values.EMPTYSTRING;
        String ownersFilter = Values.EMPTYSTRING;
        if (request.queryParams().contains("repo")) {
            repos = request.queryParamsValues("repo");
            if (repos.length != 0) {
                List<String> reposList = Arrays.asList(repos).stream().map((s) -> "reponame:" + QueryParser.escape(s.replace(" ", "_"))).collect(Collectors.toList());
                reposFilter = " && (" + StringUtils.join(reposList, " || ") + ")";
            }
        }
        if (request.queryParams().contains("lan")) {
            langs = request.queryParamsValues("lan");
            if (langs.length != 0) {
                List<String> langsList = Arrays.asList(langs).stream().map((s) -> "languagename:" + QueryParser.escape(s.replace(" ", "_"))).collect(Collectors.toList());
                langsFilter = " && (" + StringUtils.join(langsList, " || ") + ")";
            }
        }
        if (request.queryParams().contains("own")) {
            owners = request.queryParamsValues("own");
            if (owners.length != 0) {
                List<String> ownersList = Arrays.asList(owners).stream().map((s) -> "codeowner:" + QueryParser.escape(s.replace(" ", "_"))).collect(Collectors.toList());
                ownersFilter = " && (" + StringUtils.join(ownersList, " || ") + ")";
            }
        }
        // split the query escape it and and it together
        String cleanQueryString = scl.formatQueryString(query);
        SearchResult searchResult = cs.search(cleanQueryString + reposFilter + langsFilter + ownersFilter, page);
        searchResult.setCodeResultList(cm.formatResults(searchResult.getCodeResultList(), query, true));
        searchResult.setQuery(query);
        for (String altQuery : scl.generateAltQueries(query)) {
            searchResult.addAltQuery(altQuery);
        }
        // Null out code as it isnt required and there is no point in bloating our ajax requests
        for (CodeResult codeSearchResult : searchResult.getCodeResultList()) {
            codeSearchResult.setCode(null);
        }
        return searchResult;
    }
    return null;
}
Also used : Arrays(java.util.Arrays) SearchResult(com.searchcode.app.dto.SearchResult) Singleton(com.searchcode.app.service.Singleton) StringUtils(org.apache.commons.lang3.StringUtils) Collectors(java.util.stream.Collectors) Values(com.searchcode.app.config.Values) CodeMatcher(com.searchcode.app.service.CodeMatcher) SearchcodeLib(com.searchcode.app.util.SearchcodeLib) List(java.util.List) CodeResult(com.searchcode.app.dto.CodeResult) CodeSearcher(com.searchcode.app.service.CodeSearcher) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Request(spark.Request) Response(spark.Response) SearchcodeLib(com.searchcode.app.util.SearchcodeLib) CodeResult(com.searchcode.app.dto.CodeResult) CodeMatcher(com.searchcode.app.service.CodeMatcher) SearchResult(com.searchcode.app.dto.SearchResult) CodeSearcher(com.searchcode.app.service.CodeSearcher)

Example 30 with StringUtils.join

use of org.apache.commons.lang3.StringUtils.join in project nifi by apache.

the class GenerateTableFetch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
    if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        if (fileToProcess == null) {
            // Incoming connection with no flow file available, do no work (see capability description)
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    FlowFile finalFileToProcess = fileToProcess;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    try {
        // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
        // set as the current state map (after the session has been committed)
        final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
        // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
        for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
            String maxPropKey = maxProp.getKey().toLowerCase();
            String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
            if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
                String newMaxPropValue;
                // but store the new initial max value under the fully-qualified key.
                if (statePropertyMap.containsKey(maxPropKey)) {
                    newMaxPropValue = statePropertyMap.get(maxPropKey);
                } else {
                    newMaxPropValue = maxProp.getValue();
                }
                statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
            }
        }
        // Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
        // executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
        // specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
        // latest observed maximum values.
        String whereClause = null;
        List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
        List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
        String columnsClause = null;
        List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
        maxValueSelectColumns.add("COUNT(*)");
        // For each maximum-value column, get a WHERE filter and a MAX(column) alias
        IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
            String colName = maxValueColumnNameList.get(index);
            maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
            String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
            if (!StringUtils.isEmpty(maxValue)) {
                if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                    // This means column type cache is clean after instance reboot. We should re-cache column type
                    super.setup(context, false, finalFileToProcess);
                }
                Integer type = getColumnType(tableName, colName);
                // Add a condition for the WHERE clause
                maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
            }
        });
        if (customWhereClause != null) {
            // adding the custom WHERE clause (if defined) to the list of existing clauses.
            maxValueClauses.add("(" + customWhereClause + ")");
        }
        whereClause = StringUtils.join(maxValueClauses, " AND ");
        columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
        // Build a SELECT query with maximum-value columns (if present)
        final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
        long rowCount = 0;
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
            // timeout in seconds
            st.setQueryTimeout(queryTimeout);
            logger.debug("Executing {}", new Object[] { selectQuery });
            ResultSet resultSet;
            resultSet = st.executeQuery(selectQuery);
            if (resultSet.next()) {
                // Total row count is in the first column
                rowCount = resultSet.getLong(1);
                // Update the state map with the newly-observed maximum values
                ResultSetMetaData rsmd = resultSet.getMetaData();
                for (int i = 2; i <= rsmd.getColumnCount(); i++) {
                    // Some JDBC drivers consider the columns name and label to be very different things.
                    // Since this column has been aliased lets check the label first,
                    // if there is no label we'll use the column name.
                    String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
                    String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
                    String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
                    if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
                        // If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
                        // the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
                        // maximum value is observed, it will be stored under the fully-qualified key from then on.
                        resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
                    }
                    int type = rsmd.getColumnType(i);
                    if (isDynamicTableName) {
                        // We haven't pre-populated the column type map if the table name is dynamic, so do it here
                        columnTypeMap.put(fullyQualifiedStateKey, type);
                    }
                    try {
                        String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
                        if (newMaxValue != null) {
                            statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
                        }
                    } catch (ParseException | IOException pie) {
                        // Fail the whole thing here before we start creating flow files and such
                        throw new ProcessException(pie);
                    }
                }
            } else {
                // Something is very wrong here, one row (even if count is zero) should be returned
                throw new SQLException("No rows returned from metadata query: " + selectQuery);
            }
            // for each maximum-value column get a right bounding WHERE condition
            IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
                String colName = maxValueColumnNameList.get(index);
                maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
                String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
                if (!StringUtils.isEmpty(maxValue)) {
                    if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                        // This means column type cache is clean after instance reboot. We should re-cache column type
                        super.setup(context, false, finalFileToProcess);
                    }
                    Integer type = getColumnType(tableName, colName);
                    // Add a condition for the WHERE clause
                    maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
                }
            });
            // Update WHERE list to include new right hand boundaries
            whereClause = StringUtils.join(maxValueClauses, " AND ");
            final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
            // Generate SQL statements to read "pages" of data
            for (long i = 0; i < numberOfFetches; i++) {
                Long limit = partitionSize == 0 ? null : (long) partitionSize;
                Long offset = partitionSize == 0 ? null : i * partitionSize;
                final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
                final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
                FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
                if (columnNames != null) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
                }
                if (StringUtils.isNotBlank(whereClause)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
                }
                if (StringUtils.isNotBlank(maxColumnNames)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
                }
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
                if (partitionSize != 0) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
                }
                session.transfer(sqlFlowFile, REL_SUCCESS);
            }
            if (fileToProcess != null) {
                session.remove(fileToProcess);
            }
        } catch (SQLException e) {
            if (fileToProcess != null) {
                logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
                fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
                session.transfer(fileToProcess, REL_FAILURE);
            } else {
                logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
                throw new ProcessException(e);
            }
        }
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
        }
    } catch (final ProcessException pe) {
        // Log the cause of the ProcessException if it is available
        Throwable t = (pe.getCause() == null ? pe : pe.getCause());
        logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
        session.rollback();
        context.yield();
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) ValidationResult(org.apache.nifi.components.ValidationResult) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) Collection(java.util.Collection) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) Statement(java.sql.Statement) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) Collections(java.util.Collections) ResultSetMetaData(java.sql.ResultSetMetaData) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) ResultSetMetaData(java.sql.ResultSetMetaData) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap)

Aggregations

StringUtils (org.apache.commons.lang3.StringUtils)34 List (java.util.List)30 Collectors (java.util.stream.Collectors)23 ArrayList (java.util.ArrayList)21 Map (java.util.Map)17 HashMap (java.util.HashMap)15 Set (java.util.Set)14 Logger (org.slf4j.Logger)14 LoggerFactory (org.slf4j.LoggerFactory)14 IOException (java.io.IOException)13 HashSet (java.util.HashSet)11 Arrays (java.util.Arrays)10 Collections (java.util.Collections)10 Date (java.util.Date)9 File (java.io.File)6 StopWatch (org.apache.commons.lang3.time.StopWatch)6 InputStream (java.io.InputStream)5 java.util (java.util)5 Pair (org.apache.commons.lang3.tuple.Pair)5 Path (java.nio.file.Path)4