Search in sources :

Example 46 with PluginDefineException

use of com.dtstack.taier.pluginapi.exception.PluginDefineException in project Taier by DTStack.

the class SessionClientFactory method acquireAppIdAndSetClusterId.

private ApplicationId acquireAppIdAndSetClusterId(Configuration configuration) {
    try {
        Set<String> set = new HashSet<>();
        set.add("Apache Flink");
        EnumSet<YarnApplicationState> enumSet = EnumSet.noneOf(YarnApplicationState.class);
        enumSet.add(YarnApplicationState.RUNNING);
        enumSet.add(YarnApplicationState.ACCEPTED);
        YarnClient yarnClient = flinkClientBuilder.getYarnClient();
        if (null == yarnClient) {
            throw new PluginDefineException("getYarnClient error, Yarn Client is null!");
        }
        List<ApplicationReport> reportList = yarnClient.getApplications(set, enumSet);
        int maxMemory = -1;
        int maxCores = -1;
        ApplicationId applicationId = null;
        for (ApplicationReport report : reportList) {
            LOG.info("filter flink session application current reportName:{} queue:{} status:{}", report.getName(), report.getQueue(), report.getYarnApplicationState());
            if (!report.getYarnApplicationState().equals(YarnApplicationState.RUNNING)) {
                continue;
            }
            if (!report.getName().startsWith(flinkConfig.getFlinkSessionName())) {
                continue;
            }
            if (flinkConfig.getSessionStartAuto() && !report.getName().endsWith(sessionAppNameSuffix)) {
                continue;
            }
            if (!report.getQueue().endsWith(flinkConfig.getQueue())) {
                continue;
            }
            int thisMemory = report.getApplicationResourceUsageReport().getNeededResources().getMemory();
            int thisCores = report.getApplicationResourceUsageReport().getNeededResources().getVirtualCores();
            LOG.info("current flink session memory {},Cores{}", thisMemory, thisCores);
            if (thisMemory > maxMemory || thisMemory == maxMemory && thisCores > maxCores) {
                maxMemory = thisMemory;
                maxCores = thisCores;
                applicationId = report.getApplicationId();
                // flinkClusterId不为空 且 yarnsession不是由engine来管控时,需要设置clusterId(兼容手动启动yarnsession的情况)
                if (StringUtils.isBlank(configuration.getValue(HighAvailabilityOptions.HA_CLUSTER_ID)) || report.getName().endsWith(sessionAppNameSuffix)) {
                    configuration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, applicationId.toString());
                }
            }
        }
        return applicationId;
    } catch (Exception e) {
        LOG.error("", e);
        throw new PluginDefineException(e);
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException) FlinkException(org.apache.flink.util.FlinkException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) HashSet(java.util.HashSet)

Example 47 with PluginDefineException

use of com.dtstack.taier.pluginapi.exception.PluginDefineException in project Taier by DTStack.

the class SessionClientFactory method createYarnSessionClusterDescriptor.

public YarnClusterDescriptor createYarnSessionClusterDescriptor() throws MalformedURLException {
    Configuration newConf = new Configuration(flinkConfiguration);
    String flinkJarPath = flinkConfig.getFlinkJarPath();
    String pluginLoadMode = flinkConfig.getPluginLoadMode();
    YarnConfiguration yarnConf = flinkClientBuilder.getYarnConf();
    FileUtil.checkFileExist(flinkJarPath);
    if (!flinkConfig.getFlinkHighAvailability()) {
        setNoneHaModeConfig(newConf);
    } else {
        // 由engine管控的yarnsession clusterId不进行设置,默认使用appId作为clusterId
        newConf.removeConfig(HighAvailabilityOptions.HA_CLUSTER_ID);
    }
    List<File> keytabFiles = null;
    if (flinkConfig.isOpenKerberos()) {
        keytabFiles = getKeytabFilesAndSetSecurityConfig(newConf);
    }
    newConf = setHdfsFlinkJarPath(flinkConfig, newConf);
    YarnClusterDescriptor clusterDescriptor = getClusterDescriptor(newConf, yarnConf);
    if (StringUtils.isNotBlank(pluginLoadMode) && ConfigConstrant.FLINK_PLUGIN_SHIPFILE_LOAD.equalsIgnoreCase(pluginLoadMode)) {
        newConf.setString(ConfigConstrant.FLINK_PLUGIN_LOAD_MODE, flinkConfig.getPluginLoadMode());
        String flinkPluginRoot = flinkConfig.getFlinkPluginRoot();
        if (StringUtils.isNotBlank(flinkPluginRoot)) {
            String syncPluginDir = flinkPluginRoot + ConfigConstrant.SP + ConfigConstrant.SYNCPLUGIN_DIR;
            File syncFile = new File(syncPluginDir);
            if (!syncFile.exists()) {
                throw new PluginDefineException("syncPlugin path is null");
            }
            List<File> pluginPaths = Arrays.stream(syncFile.listFiles()).filter(file -> !file.getName().endsWith("zip")).collect(Collectors.toList());
            clusterDescriptor.addShipFiles(pluginPaths);
        }
    }
    if (CollectionUtils.isNotEmpty(keytabFiles)) {
        clusterDescriptor.addShipFiles(keytabFiles);
    }
    List<URL> classpaths = getFlinkJarFile(flinkJarPath, clusterDescriptor);
    clusterDescriptor.setProvidedUserJarFiles(classpaths);
    return clusterDescriptor;
}
Also used : SessionCheckInterval(com.dtstack.taier.flink.entity.SessionCheckInterval) SecurityOptions(org.apache.flink.configuration.SecurityOptions) Arrays(java.util.Arrays) FlinkConfUtil(com.dtstack.taier.flink.util.FlinkConfUtil) ClientUtils(org.apache.flink.client.ClientUtils) FileSystem(org.apache.hadoop.fs.FileSystem) URL(java.net.URL) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ErrorMessageConsts(com.dtstack.taier.flink.constrant.ErrorMessageConsts) YarnClusterDescriptor(org.apache.flink.yarn.YarnClusterDescriptor) StringUtils(org.apache.commons.lang3.StringUtils) CuratorFramework(org.apache.flink.shaded.curator.org.apache.curator.framework.CuratorFramework) KerberosUtils(com.dtstack.taier.base.util.KerberosUtils) LeaderLatchListener(org.apache.flink.shaded.curator.org.apache.curator.framework.recipes.leader.LeaderLatchListener) ExponentialBackoffRetry(org.apache.flink.shaded.curator.org.apache.curator.retry.ExponentialBackoffRetry) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FlinkConfig(com.dtstack.taier.flink.FlinkConfig) Map(java.util.Map) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) Path(org.apache.hadoop.fs.Path) CuratorFrameworkFactory(org.apache.flink.shaded.curator.org.apache.curator.framework.CuratorFrameworkFactory) EnumSet(java.util.EnumSet) TaskStatus(com.dtstack.taier.pluginapi.enums.TaskStatus) JobIdentifier(com.dtstack.taier.pluginapi.JobIdentifier) LeaderLatch(org.apache.flink.shaded.curator.org.apache.curator.framework.recipes.leader.LeaderLatch) Set(java.util.Set) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) YarnConfigOptions(org.apache.flink.yarn.configuration.YarnConfigOptions) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) List(java.util.List) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FileUtil(com.dtstack.taier.flink.util.FileUtil) ClusterClient(org.apache.flink.client.program.ClusterClient) ConfigConstant(com.dtstack.taier.pluginapi.constrant.ConfigConstant) PoolHttpClient(com.dtstack.taier.pluginapi.http.PoolHttpClient) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) JSONObject(com.alibaba.fastjson.JSONObject) PackagedProgram(org.apache.flink.client.program.PackagedProgram) ClusterClientProvider(org.apache.flink.client.program.ClusterClientProvider) FlinkException(org.apache.flink.util.FlinkException) ConfigConstrant(com.dtstack.taier.flink.constrant.ConfigConstrant) FlinkClientBuilder(com.dtstack.taier.flink.FlinkClientBuilder) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CustomThreadFactory(com.dtstack.taier.pluginapi.CustomThreadFactory) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) CollectionUtils(org.apache.commons.collections.CollectionUtils) Service(org.apache.hadoop.service.Service) SessionHealthCheckedInfo(com.dtstack.taier.flink.entity.SessionHealthCheckedInfo) ExecutorService(java.util.concurrent.ExecutorService) FlinkUtil(com.dtstack.taier.flink.util.FlinkUtil) PackagedProgramUtils(org.apache.flink.client.program.PackagedProgramUtils) Logger(org.slf4j.Logger) MalformedURLException(java.net.MalformedURLException) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) FilesystemManager(com.dtstack.taier.base.filesystem.FilesystemManager) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) JSON(com.alibaba.fastjson.JSON) JobID(org.apache.flink.api.common.JobID) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) YarnClusterDescriptor(org.apache.flink.yarn.YarnClusterDescriptor) File(java.io.File) URL(java.net.URL)

Example 48 with PluginDefineException

use of com.dtstack.taier.pluginapi.exception.PluginDefineException in project Taier by DTStack.

the class FlinkClient method getPerJobStatus.

/**
 * per-job模式其实获取的任务状态是yarn-application状态
 * @param applicationId
 * @return
 */
public TaskStatus getPerJobStatus(String applicationId) {
    try {
        return KerberosUtils.login(flinkConfig, () -> {
            ApplicationId appId = ConverterUtils.toApplicationId(applicationId);
            try {
                ApplicationReport report = flinkClientBuilder.getYarnClient().getApplicationReport(appId);
                YarnApplicationState applicationState = report.getYarnApplicationState();
                switch(applicationState) {
                    case KILLED:
                        return TaskStatus.KILLED;
                    case NEW:
                    case NEW_SAVING:
                        return TaskStatus.CREATED;
                    case SUBMITTED:
                        // FIXME 特殊逻辑,认为已提交到计算引擎的状态为等待资源状态
                        return TaskStatus.WAITCOMPUTE;
                    case ACCEPTED:
                        return TaskStatus.SCHEDULED;
                    case RUNNING:
                        return TaskStatus.RUNNING;
                    case FINISHED:
                        // state 为finished状态下需要兼顾判断finalStatus.
                        FinalApplicationStatus finalApplicationStatus = report.getFinalApplicationStatus();
                        if (finalApplicationStatus == FinalApplicationStatus.FAILED) {
                            return TaskStatus.FAILED;
                        } else if (finalApplicationStatus == FinalApplicationStatus.SUCCEEDED) {
                            return TaskStatus.FINISHED;
                        } else if (finalApplicationStatus == FinalApplicationStatus.KILLED) {
                            return TaskStatus.KILLED;
                        } else if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
                            return TaskStatus.FAILED;
                        } else {
                            return TaskStatus.RUNNING;
                        }
                    case FAILED:
                        return TaskStatus.FAILED;
                    default:
                        throw new PluginDefineException("Unsupported application state");
                }
            } catch (YarnException | IOException e) {
                logger.error("appId: {}, getPerJobStatus with yarnClient error: ", applicationId, e);
                return TaskStatus.NOTFOUND;
            }
        }, hadoopConf.getYarnConfiguration());
    } catch (Exception e) {
        logger.error("appId: {}, getPerJobStatus with yarnClient error: ", applicationId, e);
        // 防止因为kerberos 认证不过出现notfound最后变为failed
        return TaskStatus.RUNNING;
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) IOException(java.io.IOException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) MalformedURLException(java.net.MalformedURLException)

Example 49 with PluginDefineException

use of com.dtstack.taier.pluginapi.exception.PluginDefineException in project Taier by DTStack.

the class FlinkClient method grammarCheck.

@Override
public CheckResult grammarCheck(JobClient jobClient) {
    CheckResult checkResult = CheckResult.success();
    String taskId = jobClient.getJobId();
    try {
        // 1. before download jar
        beforeSubmitFunc(jobClient);
        // 2. flink sql args
        String taskWorkspace = FlinkUtil.getTaskWorkspace(jobClient.getJobId());
        List<String> args = sqlPluginInfo.buildExeArgs(jobClient);
        List<String> attachJarLists = cacheFile.get(taskWorkspace);
        List<URL> attachJarUrls = Lists.newArrayList();
        if (!CollectionUtils.isEmpty(attachJarLists)) {
            args.add("-addjar");
            String attachJarStr = PublicUtil.objToString(attachJarLists);
            args.add(URLEncoder.encode(attachJarStr, Charsets.UTF_8.name()));
            attachJarUrls = attachJarLists.stream().map(k -> {
                try {
                    return new File(k).toURL();
                } catch (MalformedURLException e) {
                    throw new PluginDefineException(e);
                }
            }).collect(Collectors.toList());
        }
        JarFileInfo coreJarInfo = sqlPluginInfo.createCoreJarInfo();
        jobClient.setCoreJarInfo(coreJarInfo);
        // 3. build jobGraph
        String[] programArgs = args.toArray(new String[args.size()]);
        Configuration flinkConfig = flinkClientBuilder.getFlinkConfiguration();
        PackagedProgram program = PackagedProgram.newBuilder().setJarFile(new File(coreJarInfo.getJarPath())).setUserClassPaths(attachJarUrls).setConfiguration(flinkConfig).setArguments(programArgs).build();
        PackagedProgramUtils.createJobGraph(program, flinkConfig, 1, false);
        logger.info("TaskId: {}, GrammarCheck success!", taskId);
    } catch (Exception e) {
        logger.error("TaskId: {}, GrammarCheck error: ", taskId, e);
        checkResult = CheckResult.exception(ExceptionUtil.getErrorMessage(e));
    } finally {
        try {
            afterSubmitFunc(jobClient);
        } catch (Exception e) {
        }
    }
    return checkResult;
}
Also used : MalformedURLException(java.net.MalformedURLException) JarFileInfo(com.dtstack.taier.pluginapi.JarFileInfo) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) URL(java.net.URL) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) MalformedURLException(java.net.MalformedURLException) PackagedProgram(org.apache.flink.client.program.PackagedProgram) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) CheckResult(com.dtstack.taier.pluginapi.pojo.CheckResult) File(java.io.File)

Example 50 with PluginDefineException

use of com.dtstack.taier.pluginapi.exception.PluginDefineException in project Taier by DTStack.

the class FlinkClientBuilder method initFlinkGlobalConfiguration.

public void initFlinkGlobalConfiguration(Properties extProp) {
    Configuration config = new Configuration();
    config.setString("akka.client.timeout", ConfigConstrant.AKKA_CLIENT_TIMEOUT);
    config.setString("akka.ask.timeout", ConfigConstrant.AKKA_ASK_TIMEOUT);
    config.setString("akka.tcp.timeout", ConfigConstrant.AKKA_TCP_TIMEOUT);
    // yarn queue
    config.setString(YarnConfigOptions.APPLICATION_QUEUE, flinkConfig.getQueue());
    config.setBytes(ConfigConstrant.HADOOP_CONF_BYTES_KEY, HadoopConfTool.serializeHadoopConf(hadoopConf));
    config.setBytes(ConfigConstrant.YARN_CONF_BYTES_KEY, HadoopConfTool.serializeHadoopConf(yarnConf));
    if (extProp != null) {
        for (Object key : extProp.keySet()) {
            String newKey = (String) key;
            String value = extProp.getProperty(newKey);
            if (StringUtils.isEmpty(value)) {
                continue;
            }
            if (!FlinkConfig.getEngineFlinkConfigs().contains(key.toString())) {
                config.setString(newKey, value);
            }
        }
    }
    config.setBoolean(ConfigConstrant.OPEN_KERBEROS_KEY, flinkConfig.isOpenKerberos());
    try {
        FileSystem.initialize(config);
    } catch (Exception e) {
        LOG.error("", e);
        throw new PluginDefineException(e);
    }
    flinkConfiguration = config;
}
Also used : Configuration(org.apache.flink.configuration.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException)

Aggregations

PluginDefineException (com.dtstack.taier.pluginapi.exception.PluginDefineException)58 IOException (java.io.IOException)30 File (java.io.File)13 MalformedURLException (java.net.MalformedURLException)13 YarnClient (org.apache.hadoop.yarn.client.api.YarnClient)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)10 JarFileInfo (com.dtstack.taier.pluginapi.JarFileInfo)8 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)8 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)8 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)7 YarnApplicationState (org.apache.hadoop.yarn.api.records.YarnApplicationState)7 ClusterClient (org.apache.flink.client.program.ClusterClient)6 JSONObject (com.alibaba.fastjson.JSONObject)5 Configuration (org.apache.flink.configuration.Configuration)5 Path (org.apache.hadoop.fs.Path)5 KerberosUtils (com.dtstack.taier.base.util.KerberosUtils)4 FlinkConfig (com.dtstack.taier.flink.FlinkConfig)4 ConfigConstant (com.dtstack.taier.pluginapi.constrant.ConfigConstant)4 URL (java.net.URL)4 Matcher (java.util.regex.Matcher)4