Search in sources :

Example 1 with ParagraphInfo

use of org.apache.zeppelin.interpreter.thrift.ParagraphInfo in project zeppelin by apache.

the class PySubmarineInterpreter method interpret.

@Override
public InterpreterResult interpret(String st, InterpreterContext context) throws InterpreterException {
    setParagraphConfig(context);
    // algorithm & checkpoint path support replaces ${username} with real user name
    String algorithmPath = properties.getProperty(SubmarineConstants.SUBMARINE_ALGORITHM_HDFS_PATH, "");
    if (algorithmPath.contains(SubmarineConstants.USERNAME_SYMBOL)) {
        algorithmPath = algorithmPath.replace(SubmarineConstants.USERNAME_SYMBOL, userName);
        properties.setProperty(SubmarineConstants.SUBMARINE_ALGORITHM_HDFS_PATH, algorithmPath);
    }
    String checkpointPath = properties.getProperty(SubmarineConstants.TF_CHECKPOINT_PATH, "");
    if (checkpointPath.contains(SubmarineConstants.USERNAME_SYMBOL)) {
        checkpointPath = checkpointPath.replace(SubmarineConstants.USERNAME_SYMBOL, userName);
        properties.setProperty(SubmarineConstants.TF_CHECKPOINT_PATH, checkpointPath);
    }
    if (null == submarineInterpreter) {
        submarineInterpreter = getInterpreterInTheSameSessionByClassName(SubmarineInterpreter.class);
        if (null != submarineInterpreter) {
            submarineInterpreter.setPythonWorkDir(context.getNoteId(), getPythonWorkDir());
        }
    }
    SubmarineJob submarineJob = submarineContext.addOrGetSubmarineJob(this.properties, context);
    if (null != submarineJob && null != submarineJob.getHdfsClient()) {
        try {
            String noteId = context.getNoteId();
            List<ParagraphInfo> paragraphInfos = context.getIntpEventClient().getParagraphList(userName, noteId);
            submarineJob.getHdfsClient().saveParagraphToFiles(noteId, paragraphInfos, getPythonWorkDir().getAbsolutePath(), properties);
        } catch (Exception e) {
            LOGGER.error(e.getMessage(), e);
        }
    }
    return super.interpret(st, context);
}
Also used : SubmarineJob(org.apache.zeppelin.submarine.job.SubmarineJob) InterpreterException(org.apache.zeppelin.interpreter.InterpreterException) ParagraphInfo(org.apache.zeppelin.interpreter.thrift.ParagraphInfo)

Example 2 with ParagraphInfo

use of org.apache.zeppelin.interpreter.thrift.ParagraphInfo in project zeppelin by apache.

the class ClusterEventTest method testClusterAuthEvent.

@Test
public void testClusterAuthEvent() throws IOException {
    String noteId = null;
    try {
        noteId = notebook.createNote("note1", anonymous);
        notebook.processNote(noteId, note -> {
            Paragraph p1 = note.addNewParagraph(anonymous);
            p1.setText("%md start remote interpreter process");
            p1.setAuthenticationInfo(anonymous);
            notebookServer.getNotebook().saveNote(note, anonymous);
            return null;
        });
        String user1Id = "user1", user2Id = "user2";
        // test user1 can get anonymous's note
        List<ParagraphInfo> paragraphList0 = null;
        try {
            paragraphList0 = notebookServer.getParagraphList(user1Id, noteId);
        } catch (ServiceException e) {
            LOGGER.error(e.getMessage(), e);
        } catch (TException e) {
            LOGGER.error(e.getMessage(), e);
        }
        assertNotNull(user1Id + " can get anonymous's note", paragraphList0);
        // test user1 cannot get user2's note
        authorizationService.setOwners(noteId, new HashSet<>(Arrays.asList(user2Id)));
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
        authorizationService.setReaders(noteId, new HashSet<>(Arrays.asList(user2Id)));
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
        authorizationService.setRunners(noteId, new HashSet<>(Arrays.asList(user2Id)));
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
        authorizationService.setWriters(noteId, new HashSet<>(Arrays.asList(user2Id)));
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
        Set<String> roles = new HashSet<>(Arrays.asList("admin"));
        // set admin roles for both user1 and user2
        authorizationService.setRoles(user2Id, roles);
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
        authorizationService.clearPermission(noteId);
        // wait cluster sync event
        Thread.sleep(1000);
        checkClusterAuthEventListener();
    } catch (InterruptedException e) {
        LOGGER.error(e.getMessage(), e);
    } finally {
        if (null != noteId) {
            notebook.removeNote(noteId, anonymous);
        }
    }
}
Also used : TException(org.apache.thrift.TException) ServiceException(org.apache.zeppelin.interpreter.thrift.ServiceException) Paragraph(org.apache.zeppelin.notebook.Paragraph) ParagraphInfo(org.apache.zeppelin.interpreter.thrift.ParagraphInfo) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with ParagraphInfo

use of org.apache.zeppelin.interpreter.thrift.ParagraphInfo in project zeppelin by apache.

the class NotebookServer method getParagraphList.

@Override
public List<ParagraphInfo> getParagraphList(String user, String noteId) throws IOException, TException, ServiceException {
    // Check READER permission
    Set<String> userAndRoles = new HashSet<>();
    userAndRoles.add(user);
    boolean isAllowed = authorizationService.isReader(noteId, userAndRoles);
    Set<String> allowed = authorizationService.getReaders(noteId);
    if (!isAllowed) {
        String errorMsg = "Insufficient privileges to READER note. " + "Allowed users or roles: " + allowed;
        throw new ServiceException(errorMsg);
    }
    return getNotebook().processNote(noteId, note -> {
        if (null == note) {
            throw new IOException("Not found this note : " + noteId);
        }
        // Convert Paragraph to ParagraphInfo
        List<ParagraphInfo> paragraphInfos = new ArrayList<>();
        List<Paragraph> paragraphs = note.getParagraphs();
        for (Paragraph paragraph : paragraphs) {
            ParagraphInfo paraInfo = new ParagraphInfo();
            paraInfo.setNoteId(noteId);
            paraInfo.setParagraphId(paragraph.getId());
            paraInfo.setParagraphTitle(paragraph.getTitle());
            paraInfo.setParagraphText(paragraph.getText());
            paragraphInfos.add(paraInfo);
        }
        return paragraphInfos;
    });
}
Also used : ServiceException(org.apache.zeppelin.interpreter.thrift.ServiceException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HashSet(java.util.HashSet) ParagraphInfo(org.apache.zeppelin.interpreter.thrift.ParagraphInfo) Paragraph(org.apache.zeppelin.notebook.Paragraph)

Example 4 with ParagraphInfo

use of org.apache.zeppelin.interpreter.thrift.ParagraphInfo in project zeppelin by apache.

the class HdfsClient method saveParagraphToFiles.

public String saveParagraphToFiles(String noteId, List<ParagraphInfo> paragraphInfos, String dirName, Properties properties) throws Exception {
    StringBuffer outputMsg = new StringBuffer();
    String hdfsUploadPath = properties.getProperty(SubmarineConstants.SUBMARINE_ALGORITHM_HDFS_PATH, "");
    HashMap<String, StringBuffer> mapParagraph = new HashMap<>();
    for (int i = 0; i < paragraphInfos.size(); i++) {
        ParagraphInfo paragraph = paragraphInfos.get(i);
        String paragraphTitle = paragraph.getParagraphTitle();
        if (org.apache.commons.lang3.StringUtils.isEmpty(paragraphTitle)) {
            String message = "WARN: The title of the [" + i + "] paragraph is empty and was not submitted to HDFS.\n";
            LOGGER.warn(message);
            outputMsg.append(message);
            continue;
        }
        if (!mapParagraph.containsKey(paragraphTitle)) {
            StringBuffer mergeScript = new StringBuffer();
            mapParagraph.put(paragraphTitle, mergeScript);
        }
        StringBuffer mergeScript = mapParagraph.get(paragraphTitle);
        String parapraphText = paragraph.getParagraphText();
        String text = parseText(parapraphText);
        mergeScript.append(text + "\n\n");
    }
    // Clear all files in the local noteId directory
    if (!org.apache.commons.lang3.StringUtils.isEmpty(dirName)) {
        String noteDir = dirName + "/" + noteId;
        File fileNoteDir = new File(noteDir);
        if (fileNoteDir.exists()) {
            fileNoteDir.delete();
        }
        fileNoteDir.mkdirs();
    }
    // Clear all files in the noteid directory in HDFS
    if (!org.apache.commons.lang3.StringUtils.isEmpty(hdfsUploadPath)) {
        Path hdfsPath = new Path(hdfsUploadPath + "/" + noteId);
        try {
            if (exists(hdfsPath)) {
                delete(hdfsPath);
                tryMkDir(hdfsPath);
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            throw new Exception(e);
        }
    }
    for (Map.Entry<String, StringBuffer> entry : mapParagraph.entrySet()) {
        try {
            String fileName = entry.getKey();
            String fileContext = entry.getValue().toString();
            String paragraphFile = dirName + "/" + noteId + "/" + fileName;
            // save to local file
            if (!org.apache.commons.lang3.StringUtils.isEmpty(dirName)) {
                File fileParagraph = new File(paragraphFile);
                if (!fileParagraph.exists()) {
                    fileParagraph.createNewFile();
                }
                FileWriter writer = new FileWriter(paragraphFile);
                writer.write(fileContext);
                writer.close();
            }
            // save to hdfs
            if (!StringUtils.isEmpty(hdfsUploadPath)) {
                String fileDir = hdfsUploadPath + "/" + noteId + "/" + fileName;
                // upload algorithm file
                LOGGER.info("Commit algorithm to HDFS: {}", fileDir);
                Path filePath = new Path(fileDir);
                writeFile(fileContext, filePath);
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            throw new Exception(e);
        }
    }
    return outputMsg.toString();
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) FileWriter(java.io.FileWriter) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) ParagraphInfo(org.apache.zeppelin.interpreter.thrift.ParagraphInfo) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with ParagraphInfo

use of org.apache.zeppelin.interpreter.thrift.ParagraphInfo in project zeppelin by apache.

the class JobRunThread method run.

public void run() {
    boolean tryLock = lockRunning.tryLock();
    if (false == tryLock) {
        LOGGER.warn("Can not get JobRunThread lockRunning!");
        return;
    }
    SubmarineUI submarineUI = submarineJob.getSubmarineUI();
    try {
        InterpreterContext intpContext = submarineJob.getIntpContext();
        String noteId = intpContext.getNoteId();
        String userName = intpContext.getAuthenticationInfo().getUser();
        String jobName = SubmarineUtils.getJobName(userName, noteId);
        if (true == running.get()) {
            String message = String.format("Job %s already running.", jobName);
            submarineUI.outputLog("WARN", message);
            LOGGER.warn(message);
            return;
        }
        running.set(true);
        Properties properties = submarineJob.getProperties();
        HdfsClient hdfsClient = submarineJob.getHdfsClient();
        File pythonWorkDir = submarineJob.getPythonWorkDir();
        submarineJob.setCurrentJobState(EXECUTE_SUBMARINE);
        String algorithmPath = properties.getProperty(SubmarineConstants.SUBMARINE_ALGORITHM_HDFS_PATH, "");
        if (!algorithmPath.startsWith("hdfs://")) {
            String message = "Algorithm file upload HDFS path, " + "Must be `hdfs://` prefix. now setting " + algorithmPath;
            submarineUI.outputLog("Configuration error", message);
            return;
        }
        List<ParagraphInfo> paragraphInfos = intpContext.getIntpEventClient().getParagraphList(userName, noteId);
        String outputMsg = hdfsClient.saveParagraphToFiles(noteId, paragraphInfos, pythonWorkDir == null ? "" : pythonWorkDir.getAbsolutePath(), properties);
        if (!StringUtils.isEmpty(outputMsg)) {
            submarineUI.outputLog("Save algorithm file", outputMsg);
        }
        HashMap jinjaParams = SubmarineUtils.propertiesToJinjaParams(properties, submarineJob, true);
        URL urlTemplate = Resources.getResource(SubmarineJob.SUBMARINE_JOBRUN_TF_JINJA);
        String template = Resources.toString(urlTemplate, Charsets.UTF_8);
        Jinjava jinjava = new Jinjava();
        String submarineCmd = jinjava.render(template, jinjaParams);
        // If the first line is a newline, delete the newline
        int firstLineIsNewline = submarineCmd.indexOf("\n");
        if (firstLineIsNewline == 0) {
            submarineCmd = submarineCmd.replaceFirst("\n", "");
        }
        StringBuffer sbLogs = new StringBuffer(submarineCmd);
        submarineUI.outputLog("Submarine submit command", sbLogs.toString());
        long timeout = Long.valueOf(properties.getProperty(SubmarineJob.TIMEOUT_PROPERTY, SubmarineJob.defaultTimeout));
        CommandLine cmdLine = CommandLine.parse(SubmarineJob.shell);
        cmdLine.addArgument(submarineCmd, false);
        DefaultExecutor executor = new DefaultExecutor();
        ExecuteWatchdog watchDog = new ExecuteWatchdog(timeout);
        executor.setWatchdog(watchDog);
        StringBuffer sbLogOutput = new StringBuffer();
        executor.setStreamHandler(new PumpStreamHandler(new LogOutputStream() {

            @Override
            protected void processLine(String line, int level) {
                line = line.trim();
                if (!StringUtils.isEmpty(line)) {
                    sbLogOutput.append(line + "\n");
                }
            }
        }));
        if (Boolean.valueOf(properties.getProperty(SubmarineJob.DIRECTORY_USER_HOME))) {
            executor.setWorkingDirectory(new File(System.getProperty("user.home")));
        }
        Map<String, String> env = new HashMap<>();
        String launchMode = (String) jinjaParams.get(SubmarineConstants.INTERPRETER_LAUNCH_MODE);
        if (StringUtils.equals(launchMode, "yarn")) {
            // Set environment variables in the submarine interpreter container run on yarn
            String javaHome, hadoopHome, hadoopConf;
            javaHome = (String) jinjaParams.get(SubmarineConstants.DOCKER_JAVA_HOME);
            hadoopHome = (String) jinjaParams.get(SubmarineConstants.DOCKER_HADOOP_HDFS_HOME);
            hadoopConf = (String) jinjaParams.get(SubmarineConstants.SUBMARINE_HADOOP_CONF_DIR);
            env.put("JAVA_HOME", javaHome);
            env.put("HADOOP_HOME", hadoopHome);
            env.put("HADOOP_HDFS_HOME", hadoopHome);
            env.put("HADOOP_CONF_DIR", hadoopConf);
            env.put("YARN_CONF_DIR", hadoopConf);
            env.put("CLASSPATH", "`$HADOOP_HDFS_HOME/bin/hadoop classpath --glob`");
            env.put("ZEPPELIN_FORCE_STOP", "true");
        }
        LOGGER.info("Execute EVN: {}, Command: {} ", env.toString(), submarineCmd);
        AtomicBoolean cmdLineRunning = new AtomicBoolean(true);
        executor.execute(cmdLine, env, new DefaultExecuteResultHandler() {

            @Override
            public void onProcessComplete(int exitValue) {
                String message = String.format("jobName %s ProcessComplete exit value is : %d", jobName, exitValue);
                LOGGER.info(message);
                submarineUI.outputLog("JOR RUN COMPLETE", message);
                cmdLineRunning.set(false);
                submarineJob.setCurrentJobState(EXECUTE_SUBMARINE_FINISHED);
            }

            @Override
            public void onProcessFailed(ExecuteException e) {
                String message = String.format("jobName %s ProcessFailed exit value is : %d, exception is : %s", jobName, e.getExitValue(), e.getMessage());
                LOGGER.error(message);
                submarineUI.outputLog("JOR RUN FAILED", message);
                cmdLineRunning.set(false);
                submarineJob.setCurrentJobState(EXECUTE_SUBMARINE_ERROR);
            }
        });
        int loopCount = 100;
        while ((loopCount-- > 0) && cmdLineRunning.get() && running.get()) {
            Thread.sleep(1000);
        }
        if (watchDog.isWatching()) {
            watchDog.destroyProcess();
            Thread.sleep(1000);
        }
        if (watchDog.isWatching()) {
            watchDog.killedProcess();
        }
        // Check if it has been submitted to YARN
        Map<String, Object> jobState = submarineJob.getJobStateByYarn(jobName);
        loopCount = 50;
        while ((loopCount-- > 0) && !jobState.containsKey("state") && running.get()) {
            Thread.sleep(3000);
            jobState = submarineJob.getJobStateByYarn(jobName);
        }
        if (!jobState.containsKey("state")) {
            String message = String.format("JOB %s was not submitted to YARN!", jobName);
            LOGGER.error(message);
            submarineUI.outputLog("JOR RUN FAILED", message);
            submarineJob.setCurrentJobState(EXECUTE_SUBMARINE_ERROR);
        }
    } catch (Exception e) {
        LOGGER.error(e.getMessage(), e);
        submarineJob.setCurrentJobState(EXECUTE_SUBMARINE_ERROR);
        submarineUI.outputLog("Exception", e.getMessage());
    } finally {
        running.set(false);
        lockRunning.unlock();
    }
}
Also used : HashMap(java.util.HashMap) Properties(java.util.Properties) URL(java.net.URL) PumpStreamHandler(org.apache.commons.exec.PumpStreamHandler) ExecuteException(org.apache.commons.exec.ExecuteException) InterpreterContext(org.apache.zeppelin.interpreter.InterpreterContext) DefaultExecuteResultHandler(org.apache.commons.exec.DefaultExecuteResultHandler) DefaultExecutor(org.apache.commons.exec.DefaultExecutor) ExecuteWatchdog(org.apache.commons.exec.ExecuteWatchdog) Jinjava(com.hubspot.jinjava.Jinjava) HdfsClient(org.apache.zeppelin.submarine.hadoop.HdfsClient) LogOutputStream(org.apache.commons.exec.LogOutputStream) ExecuteException(org.apache.commons.exec.ExecuteException) ParagraphInfo(org.apache.zeppelin.interpreter.thrift.ParagraphInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CommandLine(org.apache.commons.exec.CommandLine) File(java.io.File) SubmarineUI(org.apache.zeppelin.submarine.commons.SubmarineUI)

Aggregations

ParagraphInfo (org.apache.zeppelin.interpreter.thrift.ParagraphInfo)6 ServiceException (org.apache.zeppelin.interpreter.thrift.ServiceException)3 Paragraph (org.apache.zeppelin.notebook.Paragraph)3 File (java.io.File)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 TException (org.apache.thrift.TException)2 Test (org.junit.Test)2 Jinjava (com.hubspot.jinjava.Jinjava)1 FileWriter (java.io.FileWriter)1 URISyntaxException (java.net.URISyntaxException)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 Properties (java.util.Properties)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 CommandLine (org.apache.commons.exec.CommandLine)1 DefaultExecuteResultHandler (org.apache.commons.exec.DefaultExecuteResultHandler)1 DefaultExecutor (org.apache.commons.exec.DefaultExecutor)1