Skip to content

vllm.entrypoints.pooling.score.utils

ScoreMultiModalParam

Bases: TypedDict

A specialized parameter type for scoring multimodal content

The reasons why don't reuse CustomChatCompletionMessageParam directly: 1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions 2. Including chat-specific fields would confuse users about their purpose in scoring 3. This is a more focused interface that only exposes what's needed for scoring

Source code in vllm/entrypoints/pooling/score/utils.py
class ScoreMultiModalParam(TypedDict, total=False):
    """
    A specialized parameter type for scoring multimodal content

    The reasons why don't reuse `CustomChatCompletionMessageParam` directly:
    1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions
    2. Including chat-specific fields would confuse users about their purpose in scoring
    3. This is a more focused interface that only exposes what's needed for scoring
    """  # noqa: E501

    content: Required[list[ScoreContentPartParam]]
    """The multimodal contents"""

content instance-attribute

content: Required[list[ScoreContentPartParam]]

The multimodal contents

compress_token_type_ids

compress_token_type_ids(token_type_ids: list[int]) -> int

Return position of the first 1 or the length of the list if not found.

Source code in vllm/entrypoints/pooling/score/utils.py
def compress_token_type_ids(token_type_ids: list[int]) -> int:
    """
    Return position of the first 1 or the length of the list
    if not found.
    """
    first_one = len(token_type_ids)
    err_msg = (
        "Token type ids are expected to be a sequence"
        " of zeros followed by a sequence of ones"
    )
    for i, type_id in enumerate(token_type_ids):
        if type_id == 0 and first_one < i:
            raise ValueError(err_msg)
        elif type_id == 1 and first_one > i:
            first_one = i
        elif type_id > 1:
            raise ValueError(err_msg)

    return first_one

compute_maxsim_score

compute_maxsim_score(
    q_emb: Tensor, d_emb: Tensor
) -> Tensor

Compute ColBERT MaxSim score.

Parameters:

Name Type Description Default
q_emb Tensor

Query token embeddings [query_len, dim]

required
d_emb Tensor

Document token embeddings [doc_len, dim]

required

Returns:

Type Description
Tensor

MaxSim score (sum over query tokens of max similarity to any doc token)

Source code in vllm/entrypoints/pooling/score/utils.py
def compute_maxsim_score(q_emb: torch.Tensor, d_emb: torch.Tensor) -> torch.Tensor:
    """
    Compute ColBERT MaxSim score.

    Args:
        q_emb: Query token embeddings [query_len, dim]
        d_emb: Document token embeddings [doc_len, dim]

    Returns:
        MaxSim score (sum over query tokens of max similarity to any doc token)
    """
    # [query_len, doc_len]
    token_scores = torch.matmul(q_emb, d_emb.T)
    # Max over document tokens, sum over query tokens
    return token_scores.amax(dim=-1).sum()

post_process_tokens

post_process_tokens(
    model_config: ModelConfig, prompt: TokensPrompt
) -> None

Perform architecture-specific manipulations on the input tokens.

Note

This is an in-place operation.

Source code in vllm/entrypoints/pooling/score/utils.py
def post_process_tokens(
    model_config: ModelConfig,
    prompt: TokensPrompt,
) -> None:
    """
    Perform architecture-specific manipulations on the input tokens.

    Note:
        This is an in-place operation.
    """
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls

    model = get_model_cls(model_config)
    if supports_score_template(model):
        model.post_process_tokens(prompt)