Home

`ping()`

GET: /ping

Endpoint to check if the server is running.

Returns:

Name	Type	Description
`Response`		Response with status 200 if the server is running.

Source code in app.py

@app.get("/ping")
def ping():
    """
    # GET: /ping

    Endpoint to check if the server is running.

    Returns:
        Response: Response with status 200 if the server is running.
    """
    try:
        client = grpcclient.InferenceServerClient(
            url=config.grcp_model_server_address, verbose=False
        )
        return Response(status_code=200)
    except Exception:
        return Response(status_code=400)

`predict_image(image=File(...), webhook_url=Header(None), examination_id=Header(None))`

POST: /invocations

Endpoint to process an image and send it to the inference server.

Parameters:

Name	Type	Description	Default
`image`	`UploadFile`	Image file to process (in the request body).	`File(...)`

Headers

Content-Type: Type of the image. Can be "image/jpeg", "image/png", "image/tiff", "image/bmp", "image/jpg". Webhook-Url: URL to send the results of the inference. Examination-Id: ID of the examination. Used to track the request.

Returns:

Type	Description
	JSON

Raises: Response: Error response if the content type is not supported.

Source code in app.py

@app.post("/invocations")
def predict_image(image: UploadFile = File(...), webhook_url: str = Header(None), examination_id: str = Header(None)):
    """
    # POST: /invocations

    Endpoint to process an image and send it to the inference server.

    Args:
        image (UploadFile): Image file to process (in the request body).

    Headers:
        *Content-Type*: Type of the image. Can be "image/jpeg", "image/png", "image/tiff", "image/bmp", "image/jpg".
        *Webhook-Url*: URL to send the results of the inference.
        *Examination-Id*: ID of the examination. Used to track the request.

    Returns:
        JSON 
    Raises:
        Response: Error response if the content type is not supported.
    """

    client = grpcclient.InferenceServerClient(
        url=config.grcp_model_server_address,
        verbose=False,
        channel_args=(("grpc.lb_policy_name", "round_robin"),),
    )  # , concurrency=1, connection_timeout=10)
    model_config = client.get_model_config(
        model_name=config.model_name, model_version=config.model_version, as_json=True
    )["config"]

    content_type = image.content_type

    webhook_response = _check_webhook(webhook_url, examination_id)
    if webhook_response.status_code != 200:
        return webhook_response

    if content_type not in config.available_content_types:
        return Response(
            status=415,
            content="Cannot decode image data. Is content_type correct?",
            media_type="text/plain",
        )

    try:
        contents = image.file.read()

        image_bytes = np.frombuffer(contents, dtype=np.uint8)

        img = cv2.imdecode(image_bytes, cv2.IMREAD_COLOR)

        print(img.shape)

        run_params = {
            "amd_cut_fundus_percentage": config.amd_cut_fundus_percentage,
            "dr_cut_fundus_percentage": config.dr_cut_fundus_percentage,
            "screening_cut_fundus_percentage": config.screening_cut_fundus_percentage,
            "amd_zoom_out_fundus": config.amd_zoom_out_fundus,
            "dr_zoom_out_fundus": config.dr_zoom_out_fundus,
            "screening_zoom_out_fundus": config.screening_zoom_out_fundus,
            "amd_inference_size": config.amd_inference_image_size,
            "dr_inference_size": config.dr_inference_image_size,
            "screening_inference_size": config.screening_inference_image_size,
        }
        run_params = json.dumps(run_params)

        inputs = [
            grpcclient.InferInput("IMAGE", [1, img.shape[0], img.shape[1], img.shape[2]], np_to_triton_dtype(img.dtype)),
            grpcclient.InferInput("INPUT_JSON_PARAMS", (1, 1), "BYTES"),
        ]
        inputs[0].set_data_from_numpy(img[np.newaxis, ...])
        inputs[1].set_data_from_numpy(np.array([[run_params]] * 1, dtype=np.object_))

        outputs = [
            grpcclient.InferRequestedOutput(model_config["output"][i]["name"])
            for i in range(len(model_config["output"]))
        ]

        request_uuid = str(uuid.uuid4())

        response = client.async_infer(
            model_name=config.model_name,
            model_version=config.model_version,
            inputs=inputs,
            outputs=outputs,
            callback=partial(
                result_callback,
                model_config=model_config,
                filename=image.filename,
                client=client,
                request_uuid=request_uuid,
                webhook_url=webhook_url,
                examination_id=examination_id,
            ),
        )

        return JSONResponse(
            content={"filename": image.filename, "status": "sent", "request_uuid": request_uuid, "examination_id": examination_id},
            status_code=200,
        )
    except Exception as e:
        return JSONResponse(content={"message": str(e), "status": "error", "examination_id": examination_id}, status_code=400)

`result_callback(model_config, filename, request_uuid, result, error, client, webhook_url, examination_id)`

Callback function to process the result of the inference request.

Parameters:

Name	Type	Description	Default
`model_config`	`dict`	Model configuration dictionary.	required
`filename`	`str`	Name of the file that was processed.	required
`initial_resolution`	`tuple`	Initial resolution of the image.	required
`result`	`list`	List of output tensors.	required
`error`	`Exception`	Error that occurred during the request.	required
`client`	`object`	Triton client object.	required
`request_uuid`	`str`	UUID of the request.	required
`webhook_url`	`str`	URL to send the results of the inference.	required

Source code in app.py

def result_callback(
    model_config: dict,
    filename: str,
    request_uuid: str,
    result: Optional[list],
    error: Optional[Exception],
    client: object,
    webhook_url: str,
    examination_id: str,
) -> None:
    """
    Callback function to process the result of the inference request.

    Args:
        model_config (dict): Model configuration dictionary.
        filename (str): Name of the file that was processed.
        initial_resolution (tuple): Initial resolution of the image.
        result (list): List of output tensors.
        error (Exception): Error that occurred during the request.
        client (object): Triton client object.
        request_uuid (str): UUID of the request.
        webhook_url (str): URL to send the results of the inference.
    """
    print(error)
    print(result.as_numpy(model_config["output"][0]["name"]))
    if error is None:
        output_data = result.as_numpy(model_config["output"][0]["name"])[0]
        output_data = json.loads(output_data)

        status_message = {
            "id": request_uuid,
            "status": "COMPLETED",
            "output": output_data,
            "filename": filename,
        }
        requests.post(webhook_url, json=status_message)

        log_message = json.dumps({
            "model": model_config["name"],
            "examination_id": examination_id,
            "status": "COMPLETED",
            "filename": filename,
            "request_uuid": request_uuid,
            "level": "INFO"
        })

        logger.info(log_message)
    else:
        status_message = {
            "id": request_uuid,
            "status": "FAILED",
            "error": str(error),
            "filename": filename,
        }
        requests.post(webhook_url, json=status_message)


        log_message = json.dumps({
            "model": model_config["name"],
            "examination_id": examination_id,
            "status": "FAILED",
            "filename": filename,
            "request_uuid": request_uuid,
            "error": str(error)
        })
        logger.error(
            log_message
        )

    client.close()