LLM Configuration API

llm_ie.engines.BasicLLMConfig

BasicLLMConfig(
    max_new_tokens: int = 2048,
    temperature: float = 0.0,
    **kwargs
)

Bases: LLMConfig

The basic LLM configuration for most non-reasoning models.

Source code in package/llm-ie/src/llm_ie/engines.py

def __init__(self, max_new_tokens:int=2048, temperature:float=0.0, **kwargs):
    """
    The basic LLM configuration for most non-reasoning models.
    """
    super().__init__(**kwargs)
    self.max_new_tokens = max_new_tokens
    self.temperature = temperature
    self.params["max_new_tokens"] = self.max_new_tokens
    self.params["temperature"] = self.temperature

preprocess_messages

preprocess_messages(
    messages: List[Dict[str, str]],
) -> List[Dict[str, str]]

This method preprocesses the input messages before passing them to the LLM.

Parameters:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Returns:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Source code in package/llm-ie/src/llm_ie/engines.py

def preprocess_messages(self, messages:List[Dict[str,str]]) -> List[Dict[str,str]]:
    """
    This method preprocesses the input messages before passing them to the LLM.

    Parameters:
    ----------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}

    Returns:
    -------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}
    """
    return messages

postprocess_response

postprocess_response(
    response: Union[str, Generator[str, None, None]],
) -> Union[str, Generator[Dict[str, str], None, None]]

This method postprocesses the LLM response after it is generated.

Parameters:

response : Union[str, Generator[str, None, None]] the LLM response. Can be a string or a generator.

Returns: Union[str, Generator[Dict[str, str], None, None]] the postprocessed LLM response. if input is a generator, the output will be a generator {"data": }.

Source code in package/llm-ie/src/llm_ie/engines.py

def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str, str], None, None]]:
    """
    This method postprocesses the LLM response after it is generated.

    Parameters:
    ----------
    response : Union[str, Generator[str, None, None]]
        the LLM response. Can be a string or a generator.

    Returns: Union[str, Generator[Dict[str, str], None, None]]
        the postprocessed LLM response. 
        if input is a generator, the output will be a generator {"data": <content>}.
    """
    if isinstance(response, str):
        return response

    def _process_stream():
        for chunk in response:
            yield {"type": "response", "data": chunk}

    return _process_stream()

llm_ie.engines.OpenAIReasoningLLMConfig

OpenAIReasoningLLMConfig(
    reasoning_effort: str = None, **kwargs
)

Bases: LLMConfig

The OpenAI "o" series configuration. 1. The reasoning effort as one of {"low", "medium", "high"}. For models that do not support setting reasoning effort (e.g., o1-mini, o1-preview), set to None. 2. The temperature parameter is not supported and will be ignored. 3. The system prompt is not supported and will be concatenated to the next user prompt.

Parameters:

reasoning_effort : str, Optional the reasoning effort. Must be one of {"low", "medium", "high"}. Default is "low".

Source code in package/llm-ie/src/llm_ie/engines.py

def __init__(self, reasoning_effort:str=None, **kwargs):
    """
    The OpenAI "o" series configuration.
    1. The reasoning effort as one of {"low", "medium", "high"}.
        For models that do not support setting reasoning effort (e.g., o1-mini, o1-preview), set to None.
    2. The temperature parameter is not supported and will be ignored.
    3. The system prompt is not supported and will be concatenated to the next user prompt.

    Parameters:
    ----------
    reasoning_effort : str, Optional
        the reasoning effort. Must be one of {"low", "medium", "high"}. Default is "low".
    """
    super().__init__(**kwargs)
    if reasoning_effort is not None:
        if reasoning_effort not in ["low", "medium", "high"]:
            raise ValueError("reasoning_effort must be one of {'low', 'medium', 'high'}.")

        self.reasoning_effort = reasoning_effort
        self.params["reasoning_effort"] = self.reasoning_effort

    if "temperature" in self.params:
        warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
        self.params.pop("temperature")

preprocess_messages

preprocess_messages(
    messages: List[Dict[str, str]],
) -> List[Dict[str, str]]

Concatenate system prompts to the next user prompt.

Parameters:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Returns:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Source code in package/llm-ie/src/llm_ie/engines.py

def preprocess_messages(self, messages:List[Dict[str,str]]) -> List[Dict[str,str]]:
    """
    Concatenate system prompts to the next user prompt.

    Parameters:
    ----------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}

    Returns:
    -------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}
    """
    system_prompt_holder = ""
    new_messages = []
    for i, message in enumerate(messages):
        # if system prompt, store it in system_prompt_holder
        if message['role'] == 'system':
            system_prompt_holder = message['content']
        # if user prompt, concatenate it with system_prompt_holder
        elif message['role'] == 'user':
            if system_prompt_holder:
                new_message = {'role': message['role'], 'content': f"{system_prompt_holder} {message['content']}"}
                system_prompt_holder = ""
            else:
                new_message = {'role': message['role'], 'content': message['content']}

            new_messages.append(new_message)
        # if assistant/other prompt, do nothing
        else:
            new_message = {'role': message['role'], 'content': message['content']}
            new_messages.append(new_message)

    return new_messages

postprocess_response

postprocess_response(
    response: Union[str, Generator[str, None, None]],
) -> Union[str, Generator[Dict[str, str], None, None]]

This method postprocesses the LLM response after it is generated.

Parameters:

response : Union[str, Generator[str, None, None]] the LLM response. Can be a string or a generator.

Returns: Union[str, Generator[Dict[str, str], None, None]] the postprocessed LLM response. if input is a generator, the output will be a generator {"type": "response", "data": }.

Source code in package/llm-ie/src/llm_ie/engines.py

def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str, str], None, None]]:
    """
    This method postprocesses the LLM response after it is generated.

    Parameters:
    ----------
    response : Union[str, Generator[str, None, None]]
        the LLM response. Can be a string or a generator.

    Returns: Union[str, Generator[Dict[str, str], None, None]]
        the postprocessed LLM response. 
        if input is a generator, the output will be a generator {"type": "response", "data": <content>}.
    """
    if isinstance(response, str):
        return response

    def _process_stream():
        for chunk in response:
            yield {"type": "response", "data": chunk}

    return _process_stream()

llm_ie.engines.Qwen3LLMConfig

Qwen3LLMConfig(thinking_mode: bool = True, **kwargs)

Bases: LLMConfig

The Qwen3 LLM configuration for reasoning models.

Parameters:

thinking_mode : bool, Optional if True, a special token "/think" will be placed after each system and user prompt. Otherwise, "/no_think" will be placed.

Source code in package/llm-ie/src/llm_ie/engines.py

def __init__(self, thinking_mode:bool=True, **kwargs):
    """
    The Qwen3 LLM configuration for reasoning models.

    Parameters:
    ----------
    thinking_mode : bool, Optional
        if True, a special token "/think" will be placed after each system and user prompt. Otherwise, "/no_think" will be placed.
    """
    super().__init__(**kwargs)
    self.thinking_mode = thinking_mode

preprocess_messages

preprocess_messages(
    messages: List[Dict[str, str]],
) -> List[Dict[str, str]]

Append a special token to the system and user prompts. The token is "/think" if thinking_mode is True, otherwise "/no_think".

Parameters:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Returns:

messages : List[Dict[str,str]] a list of dict with role and content. role must be one of {"system", "user", "assistant"}

Source code in package/llm-ie/src/llm_ie/engines.py

def preprocess_messages(self, messages:List[Dict[str,str]]) -> List[Dict[str,str]]:
    """
    Append a special token to the system and user prompts.
    The token is "/think" if thinking_mode is True, otherwise "/no_think".

    Parameters:
    ----------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}

    Returns:
    -------
    messages : List[Dict[str,str]]
        a list of dict with role and content. role must be one of {"system", "user", "assistant"}
    """
    thinking_token = "/think" if self.thinking_mode else "/no_think"
    new_messages = []
    for message in messages:
        if message['role'] in ['system', 'user']:
            new_message = {'role': message['role'], 'content': f"{message['content']} {thinking_token}"}
        else:
            new_message = {'role': message['role'], 'content': message['content']}

        new_messages.append(new_message)

    return new_messages

postprocess_response

postprocess_response(
    response: Union[str, Generator[str, None, None]],
) -> Union[str, Generator[Dict[str, str], None, None]]

If input is a generator, tag contents in and as {"type": "reasoning", "data": }, and the rest as {"type": "response", "data": }. If input is a string, drop contents in and .

Parameters:

response : Union[str, Generator[str, None, None]] the LLM response. Can be a string or a generator.

Returns:

response : Union[str, Generator[str, None, None]] the postprocessed LLM response. if input is a generator, the output will be a generator {"type": , "data": }.

Source code in package/llm-ie/src/llm_ie/engines.py

def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str,str], None, None]]:
    """
    If input is a generator, tag contents in <think> and </think> as {"type": "reasoning", "data": <content>},
    and the rest as {"type": "response", "data": <content>}.
    If input is a string, drop contents in <think> and </think>.

    Parameters:
    ----------
    response : Union[str, Generator[str, None, None]]
        the LLM response. Can be a string or a generator.

    Returns:
    -------
    response : Union[str, Generator[str, None, None]]
        the postprocessed LLM response.
        if input is a generator, the output will be a generator {"type": <reasoning or response>, "data": <content>}.
    """
    if isinstance(response, str):
        return re.sub(r"<think>.*?</think>\s*", "", response, flags=re.DOTALL).strip()

    if isinstance(response, Generator):
        def _process_stream():
            think_flag = False
            buffer = ""
            for chunk in response:
                if isinstance(chunk, str):
                    buffer += chunk
                    # switch between reasoning and response
                    if "<think>" in buffer:
                        think_flag = True
                        buffer = buffer.replace("<think>", "")
                    elif "</think>" in buffer:
                        think_flag = False
                        buffer = buffer.replace("</think>", "")

                    # if chunk is in thinking block, tag it as reasoning; else tag it as response
                    if chunk not in ["<think>", "</think>"]:
                        if think_flag:
                            yield {"type": "reasoning", "data": chunk}
                        else:
                            yield {"type": "response", "data": chunk}

        return _process_stream()