use of io.hops.hopsworks.common.featurestore.featuregroup.IngestionJob in project hopsworks by logicalclocks.
the class FeaturegroupService method ingestionJob.
@POST
@Path("/{featuregroupId}/ingestion")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@AllowedProjectRoles({ AllowedProjectRoles.DATA_SCIENTIST, AllowedProjectRoles.DATA_OWNER })
@JWTRequired(acceptedTokens = { Audience.API, Audience.JOB }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER" })
@ApiKeyRequired(acceptedScopes = { ApiScope.FEATURESTORE }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER" })
@ApiOperation(value = "Prepares environment for uploading data to ingest into the feature group", response = IngestionJobDTO.class)
public Response ingestionJob(@Context SecurityContext sc, @Context UriInfo uriInfo, @ApiParam(value = "Id of the featuregroup", required = true) @PathParam("featuregroupId") Integer featuregroupId, IngestionJobConf ingestionJobConf) throws DatasetException, HopsSecurityException, FeaturestoreException, JobException {
Users user = jWTHelper.getUserPrincipal(sc);
verifyIdProvided(featuregroupId);
Featuregroup featuregroup = featuregroupController.getFeaturegroupById(featurestore, featuregroupId);
Map<String, String> dataOptions = null;
if (ingestionJobConf.getDataOptions() != null) {
dataOptions = ingestionJobConf.getDataOptions().stream().collect(Collectors.toMap(OptionDTO::getName, OptionDTO::getValue));
}
Map<String, String> writeOptions = null;
if (ingestionJobConf.getWriteOptions() != null) {
writeOptions = ingestionJobConf.getWriteOptions().stream().collect(Collectors.toMap(OptionDTO::getName, OptionDTO::getValue));
}
IngestionJob ingestionJob = fsJobManagerController.setupIngestionJob(project, user, featuregroup, ingestionJobConf.getSparkJobConfiguration(), ingestionJobConf.getDataFormat(), writeOptions, dataOptions);
IngestionJobDTO ingestionJobDTO = ingestionJobBuilder.build(uriInfo, project, featuregroup, ingestionJob);
return Response.ok().entity(ingestionJobDTO).build();
}
use of io.hops.hopsworks.common.featurestore.featuregroup.IngestionJob in project hopsworks by logicalclocks.
the class FsJobManagerController method setupIngestionJob.
public IngestionJob setupIngestionJob(Project project, Users user, Featuregroup featureGroup, SparkJobConfiguration sparkJobConfiguration, IngestionDataFormat dataFormat, Map<String, String> writeOptions, Map<String, String> dataOptions) throws FeaturestoreException, DatasetException, HopsSecurityException, JobException {
DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsersController.getHdfsUserName(project, user));
try {
String dataPath = getIngestionPath(project, user, featureGroup, udfso);
String jobConfigurationPath = getJobConfigurationPath(project, featureGroup.getName(), featureGroup.getVersion(), "ingestion");
Map<String, Object> jobConfiguration = new HashMap<>();
jobConfiguration.put("feature_store", featurestoreController.getOfflineFeaturestoreDbName(featureGroup.getFeaturestore().getProject()));
jobConfiguration.put("name", featureGroup.getName());
jobConfiguration.put("version", String.valueOf(featureGroup.getVersion()));
jobConfiguration.put("data_path", dataPath);
jobConfiguration.put("data_format", dataFormat.toString());
jobConfiguration.put("data_options", dataOptions);
jobConfiguration.put("write_options", writeOptions);
String jobConfigurationStr = objectMapper.writeValueAsString(jobConfiguration);
writeToHDFS(jobConfigurationPath, jobConfigurationStr, udfso);
Jobs ingestionJob = configureJob(user, project, sparkJobConfiguration, getJobName(INSERT_FG_OP, Utils.getFeaturegroupName(featureGroup)), getJobArgs(INSERT_FG_OP, jobConfigurationPath));
// the client will trigger the job once the data upload is done.
return new IngestionJob(dataPath, ingestionJob);
} catch (IOException e) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ERROR_JOB_SETUP, Level.SEVERE, "Error setting up feature group import job", e.getMessage(), e);
} finally {
dfs.closeDfsClient(udfso);
}
}
Aggregations