use of io.hops.hopsworks.common.serving.inference.logger.InferenceLogger in project hopsworks by logicalclocks.
the class InferenceController method infer.
/**
* Makes an inference request to a running serving instance
*
* @param project the project where the serving is running
* @param modelName the name of the serving
* @param modelVersion the version of the serving
* @param verb the predictiont type (predict, regress, or classify)
* @param inferenceRequestJson the user-provided JSON payload for the inference request
* @return a string representation of the inference result
* @throws InferenceException
*/
public String infer(Project project, String username, String modelName, Integer modelVersion, InferenceVerb verb, String inferenceRequestJson, String authHeader) throws InferenceException, ApiKeyException {
Serving serving = servingFacade.findByProjectAndName(project, modelName);
if (serving == null) {
throw new InferenceException(RESTCodes.InferenceErrorCode.SERVING_NOT_FOUND, Level.FINE, "name: " + modelName);
}
if (verb == null) {
throw new InferenceException(RESTCodes.InferenceErrorCode.MISSING_VERB, Level.FINE);
}
if (modelVersion != null && modelVersion < 0) {
throw new InferenceException(RESTCodes.InferenceErrorCode.BAD_REQUEST, Level.FINE, "Model version must be " + "positive");
}
// ServingInferenceController is either localhost or kubernetes inference controller
Pair<Integer, String> inferenceResult = servingInferenceController.infer(username, serving, modelVersion, verb, inferenceRequestJson, authHeader);
// Log the inference
for (InferenceLogger inferenceLogger : inferenceLoggers) {
try {
inferenceLogger.logInferenceRequest(serving, inferenceRequestJson, inferenceResult.getL(), inferenceResult.getR());
} catch (Exception e) {
// We don't want to fill the logs with inference logging errors
logger.log(Level.FINE, "Error logging inference for logger: " + inferenceLogger.getClassName(), e);
}
}
// If the inference server returned something different than 200 then throw an exception to the user
if (inferenceResult.getL() >= 500) {
logger.log(Level.FINE, "Request error: " + inferenceResult.getL() + " - " + inferenceResult.getR());
throw new InferenceException(RESTCodes.InferenceErrorCode.SERVING_INSTANCE_INTERNAL, Level.FINE, inferenceResult.getR());
} else if (inferenceResult.getL() >= 400) {
logger.log(Level.FINE, "Request error: " + inferenceResult.getL() + " - " + inferenceResult.getR());
throw new InferenceException(RESTCodes.InferenceErrorCode.SERVING_INSTANCE_BAD_REQUEST, Level.FINE, inferenceResult.getR());
}
return inferenceResult.getR();
}
Aggregations