Search in sources :

Example 1 with StringMap

use of org.apache.airavata.datalake.orchestrator.workflow.engine.task.types.StringMap in project airavata-data-lake by apache.

the class DataParsingWorkflowManager method submitDataParsingWorkflow.

public void submitDataParsingWorkflow(WorkflowInvocationRequest request) throws Exception {
    WorkflowMessage workflowMessage = request.getMessage();
    for (String sourceResourceId : workflowMessage.getSourceResourceIdsList()) {
        logger.info("Processing parsing workflow for resource {}", sourceResourceId);
        FileMetadataResponse metadata;
        try (MFTApiClient mftClient = new MFTApiClient(mftHost, mftPort)) {
            MFTApiServiceGrpc.MFTApiServiceBlockingStub mftClientStub = mftClient.get();
            DelegateAuth delegateAuth = DelegateAuth.newBuilder().setUserId(workflowMessage.getUsername()).setClientId(mftClientId).setClientSecret(mftClientSecret).putProperties("TENANT_ID", workflowMessage.getTenantId()).build();
            metadata = mftClientStub.getFileResourceMetadata(FetchResourceMetadataRequest.newBuilder().setResourceType("SCP").setResourceId(sourceResourceId).setResourceToken(workflowMessage.getSourceCredentialToken()).setMftAuthorizationToken(AuthToken.newBuilder().setDelegateAuth(delegateAuth).build()).build());
        }
        ManagedChannel channel = ManagedChannelBuilder.forAddress("localhost", 6566).usePlaintext().build();
        DataParserServiceGrpc.DataParserServiceBlockingStub parserClient = DataParserServiceGrpc.newBlockingStub(channel);
        ParsingJobListResponse parsingJobs = parserClient.listParsingJobs(ParsingJobListRequest.newBuilder().build());
        String tempDownloadPath = baseWorkingDir + UUID.randomUUID().toString();
        Map<String, StringMap> parserInputMappings = new HashMap<>();
        List<DataParsingJob> selectedPJs = parsingJobs.getParsersList().stream().filter(pj -> {
            List<DataParsingJobInput> pjis = pj.getDataParsingJobInputsList();
            boolean match = true;
            StringMap stringMap = new StringMap();
            for (DataParsingJobInput pji : pjis) {
                ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");
                Bindings bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE);
                bindings.put("polyglot.js.allowHostAccess", true);
                bindings.put("polyglot.js.allowHostClassLookup", (Predicate<String>) s -> true);
                bindings.put("metadata", metadata);
                try {
                    Boolean eval = (Boolean) engine.eval(pji.getSelectionQuery());
                    stringMap.put(pji.getDataParserInputInterfaceId(), tempDownloadPath);
                    match = match && eval;
                } catch (ScriptException e) {
                    logger.error("Failed to evaluate parsing job {}", pj.getDataParsingJobId());
                    match = false;
                }
            }
            if (match) {
                parserInputMappings.put(pj.getParserId(), stringMap);
            }
            return match;
        }).collect(Collectors.toList());
        if (selectedPJs.isEmpty()) {
            logger.warn("No parsing jobs available for resource {} with path {}. So ignoring the workflow", sourceResourceId, metadata.getResourcePath());
            continue;
        }
        Map<String, AbstractTask> taskMap = new HashMap<>();
        SyncLocalDataDownloadTask downloadTask = new SyncLocalDataDownloadTask();
        downloadTask.setTaskId("SLDT-" + UUID.randomUUID().toString());
        downloadTask.setMftClientId(mftClientId);
        downloadTask.setMftClientSecret(mftClientSecret);
        downloadTask.setUserId(workflowMessage.getUsername());
        downloadTask.setTenantId(workflowMessage.getTenantId());
        downloadTask.setMftHost(mftHost);
        downloadTask.setMftPort(mftPort);
        downloadTask.setSourceResourceId(sourceResourceId);
        downloadTask.setSourceCredToken(workflowMessage.getSourceCredentialToken());
        downloadTask.setDownloadPath(tempDownloadPath);
        taskMap.put(downloadTask.getTaskId(), downloadTask);
        for (String parserId : parserInputMappings.keySet()) {
            String parserWorkingDir = baseWorkingDir + UUID.randomUUID();
            GenericDataParsingTask dataParsingTask = new GenericDataParsingTask();
            dataParsingTask.setTaskId("DPT-" + UUID.randomUUID().toString());
            dataParsingTask.setParserId(parserId);
            dataParsingTask.setParserServiceHost(orchHost);
            dataParsingTask.setParserServicePort(orchPort);
            dataParsingTask.setInputMapping(parserInputMappings.get(parserId));
            dataParsingTask.setWorkingDirectory(parserWorkingDir);
            taskMap.put(dataParsingTask.getTaskId(), dataParsingTask);
            OutPort outPort = new OutPort();
            outPort.setNextTaskId(dataParsingTask.getTaskId());
            downloadTask.addOutPort(outPort);
            DataParsingJob dataParsingJob = selectedPJs.stream().filter(pj -> pj.getParserId().equals(parserId)).findFirst().get();
            ParserFetchResponse parser = parserClient.fetchParser(ParserFetchRequest.newBuilder().setParserId(parserId).build());
            for (DataParserOutputInterface dataParserOutputInterface : parser.getParser().getOutputInterfacesList()) {
                Optional<DataParsingJobOutput> dataParsingJobOutput = dataParsingJob.getDataParsingJobOutputsList().stream().filter(o -> o.getDataParserOutputInterfaceId().equals(dataParserOutputInterface.getParserOutputInterfaceId())).findFirst();
                if (dataParsingJobOutput.isPresent() && dataParsingJobOutput.get().getOutputType().equals("JSON")) {
                    MetadataPersistTask mpt = new MetadataPersistTask();
                    mpt.setTaskId("MPT-" + UUID.randomUUID().toString());
                    mpt.setDrmsHost(drmsHost);
                    mpt.setDrmsPort(drmsPort);
                    mpt.setTenant(workflowMessage.getTenantId());
                    mpt.setUser(workflowMessage.getUsername());
                    mpt.setServiceAccountKey(mftClientId);
                    mpt.setServiceAccountSecret(mftClientSecret);
                    mpt.setResourceId(sourceResourceId);
                    mpt.setJsonFile(parserWorkingDir + File.separator + "outputs" + File.separator + dataParserOutputInterface.getOutputName());
                    OutPort dpOut = new OutPort();
                    dpOut.setNextTaskId(mpt.getTaskId());
                    dataParsingTask.addOutPort(dpOut);
                    taskMap.put(mpt.getTaskId(), mpt);
                }
            }
        }
        String[] startTaskIds = { downloadTask.getTaskId() };
        String workflowId = workflowOperator.buildAndRunWorkflow(taskMap, startTaskIds);
        logger.info("Submitted workflow {} to parse resource {} with path {}", workflowId, sourceResourceId, metadata.getResourcePath());
    }
}
Also used : java.util(java.util) StringMap(org.apache.airavata.datalake.orchestrator.workflow.engine.task.types.StringMap) ManagedChannel(io.grpc.ManagedChannel) AbstractTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.AbstractTask) GenericDataParsingTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.GenericDataParsingTask) WorkflowMessage(org.apache.airavata.datalake.orchestrator.workflow.engine.WorkflowMessage) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) org.apache.airavata.datalake.data.orchestrator.api.stub.parsing(org.apache.airavata.datalake.data.orchestrator.api.stub.parsing) WorkflowInvocationRequest(org.apache.airavata.datalake.orchestrator.workflow.engine.WorkflowInvocationRequest) FetchResourceMetadataRequest(org.apache.airavata.mft.api.service.FetchResourceMetadataRequest) SyncLocalDataDownloadTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.SyncLocalDataDownloadTask) CallbackWorkflowStore(org.apache.airavata.datalake.orchestrator.workflow.engine.wm.CallbackWorkflowStore) DelegateAuth(org.apache.airavata.mft.common.DelegateAuth) AuthToken(org.apache.airavata.mft.common.AuthToken) MetadataPersistTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.MetadataPersistTask) Logger(org.slf4j.Logger) Predicate(java.util.function.Predicate) OutPort(org.apache.airavata.datalake.orchestrator.workflow.engine.task.OutPort) javax.script(javax.script) Collectors(java.util.stream.Collectors) File(java.io.File) MFTApiClient(org.apache.airavata.mft.api.client.MFTApiClient) ManagedChannelBuilder(io.grpc.ManagedChannelBuilder) MFTApiServiceGrpc(org.apache.airavata.mft.api.service.MFTApiServiceGrpc) FileMetadataResponse(org.apache.airavata.mft.api.service.FileMetadataResponse) WorkflowOperator(org.apache.airavata.datalake.orchestrator.workflow.engine.wm.WorkflowOperator) StringMap(org.apache.airavata.datalake.orchestrator.workflow.engine.task.types.StringMap) WorkflowMessage(org.apache.airavata.datalake.orchestrator.workflow.engine.WorkflowMessage) AbstractTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.AbstractTask) SyncLocalDataDownloadTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.SyncLocalDataDownloadTask) Predicate(java.util.function.Predicate) FileMetadataResponse(org.apache.airavata.mft.api.service.FileMetadataResponse) MFTApiServiceGrpc(org.apache.airavata.mft.api.service.MFTApiServiceGrpc) ManagedChannel(io.grpc.ManagedChannel) MetadataPersistTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.MetadataPersistTask) MFTApiClient(org.apache.airavata.mft.api.client.MFTApiClient) DelegateAuth(org.apache.airavata.mft.common.DelegateAuth) GenericDataParsingTask(org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.GenericDataParsingTask) OutPort(org.apache.airavata.datalake.orchestrator.workflow.engine.task.OutPort)

Aggregations

ManagedChannel (io.grpc.ManagedChannel)1 ManagedChannelBuilder (io.grpc.ManagedChannelBuilder)1 File (java.io.File)1 java.util (java.util)1 Predicate (java.util.function.Predicate)1 Collectors (java.util.stream.Collectors)1 javax.script (javax.script)1 org.apache.airavata.datalake.data.orchestrator.api.stub.parsing (org.apache.airavata.datalake.data.orchestrator.api.stub.parsing)1 WorkflowInvocationRequest (org.apache.airavata.datalake.orchestrator.workflow.engine.WorkflowInvocationRequest)1 WorkflowMessage (org.apache.airavata.datalake.orchestrator.workflow.engine.WorkflowMessage)1 AbstractTask (org.apache.airavata.datalake.orchestrator.workflow.engine.task.AbstractTask)1 OutPort (org.apache.airavata.datalake.orchestrator.workflow.engine.task.OutPort)1 GenericDataParsingTask (org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.GenericDataParsingTask)1 MetadataPersistTask (org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.MetadataPersistTask)1 SyncLocalDataDownloadTask (org.apache.airavata.datalake.orchestrator.workflow.engine.task.impl.SyncLocalDataDownloadTask)1 StringMap (org.apache.airavata.datalake.orchestrator.workflow.engine.task.types.StringMap)1 CallbackWorkflowStore (org.apache.airavata.datalake.orchestrator.workflow.engine.wm.CallbackWorkflowStore)1 WorkflowOperator (org.apache.airavata.datalake.orchestrator.workflow.engine.wm.WorkflowOperator)1 MFTApiClient (org.apache.airavata.mft.api.client.MFTApiClient)1 FetchResourceMetadataRequest (org.apache.airavata.mft.api.service.FetchResourceMetadataRequest)1