Render chat completion request and return conversation and engine prompts without generating.
Source code in vllm/entrypoints/openai/chat_completion/api_router.py
| @router.post(
"/v1/chat/completions/render",
dependencies=[Depends(validate_json_request)],
response_model=list,
responses={
HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse},
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
},
)
async def render_chat_completion(request: ChatCompletionRequest, raw_request: Request):
"""Render chat completion request and return conversation and engine
prompts without generating."""
handler = chat(raw_request)
if handler is None:
base_server = raw_request.app.state.openai_serving_tokenization
return base_server.create_error_response(
message="The model does not support Chat Completions API"
)
try:
result = await handler.render_chat_request(request)
except Exception as e:
return handler.create_error_response(e)
if isinstance(result, ErrorResponse):
return JSONResponse(content=result.model_dump(), status_code=result.error.code)
return JSONResponse(content=result)
|