Skip to content

vision_agent.agent

vision_agent.agent.agent.Agent

Bases: ABC

log_progress abstractmethod

log_progress(data)

Log the progress of the agent. This is a hook that is intended for reporting the progress of the agent.

Source code in vision_agent/agent/agent.py
@abstractmethod
def log_progress(self, data: Dict[str, Any]) -> None:
    """Log the progress of the agent.
    This is a hook that is intended for reporting the progress of the agent.
    """
    pass

vision_agent.agent.vision_agent_v2.VisionAgentV2

VisionAgentV2(
    agent=None,
    coder=None,
    hil=False,
    verbose=False,
    code_sandbox_runtime=None,
    update_callback=lambda x: None,
)

Bases: Agent

VisionAgentV2 is a conversational agent that allows you to more easily use a coder agent such as VisionAgentCoderV2 to write vision code for you.

Initialize the VisionAgentV2.

PARAMETER DESCRIPTION
agent

The language model to use for the agent. If None, a default AnthropicLMM will be used.

TYPE: Optional[LMM] DEFAULT: None

coder

The coder agent to use for generating vision code. If None, a default VisionAgentCoderV2 will be used.

TYPE: Optional[AgentCoder] DEFAULT: None

hil

Whether to use human-in-the-loop mode.

TYPE: bool DEFAULT: False

verbose

Whether to print out debug information.

TYPE: bool DEFAULT: False

code_sandbox_runtime

The code sandbox runtime to use, can be one of: None or "local". If None, it will read from the environment variable CODE_SANDBOX_RUNTIME.

TYPE: Optional[str] DEFAULT: None

update_callback

The callback function that will send back intermediate conversation messages.

TYPE: Callable[[Dict[str, Any]], None] DEFAULT: lambda x: None

Source code in vision_agent/agent/vision_agent_v2.py
def __init__(
    self,
    agent: Optional[LMM] = None,
    coder: Optional[AgentCoder] = None,
    hil: bool = False,
    verbose: bool = False,
    code_sandbox_runtime: Optional[str] = None,
    update_callback: Callable[[Dict[str, Any]], None] = lambda x: None,
) -> None:
    """Initialize the VisionAgentV2.

    Parameters:
        agent (Optional[LMM]): The language model to use for the agent. If None, a
            default AnthropicLMM will be used.
        coder (Optional[AgentCoder]): The coder agent to use for generating vision
            code. If None, a default VisionAgentCoderV2 will be used.
        hil (bool): Whether to use human-in-the-loop mode.
        verbose (bool): Whether to print out debug information.
        code_sandbox_runtime (Optional[str]): The code sandbox runtime to use, can
            be one of: None or "local". If None, it will read from the
            environment variable CODE_SANDBOX_RUNTIME.
        update_callback (Callable[[Dict[str, Any]], None]): The callback function
            that will send back intermediate conversation messages.
    """

    self.agent = agent if agent is not None else CONFIG.create_agent()
    self.coder = (
        coder
        if coder is not None
        else VisionAgentCoderV2(
            verbose=verbose, update_callback=update_callback, hil=hil
        )
    )

    self.verbose = verbose
    self.code_sandbox_runtime = code_sandbox_runtime
    self.update_callback = update_callback

    # force coder to use the same update_callback
    if hasattr(self.coder, "update_callback"):
        self.coder.update_callback = update_callback

agent instance-attribute

agent = agent if agent is not None else create_agent()

coder instance-attribute

coder = (
    coder
    if coder is not None
    else VisionAgentCoderV2(
        verbose=verbose,
        update_callback=update_callback,
        hil=hil,
    )
)

verbose instance-attribute

verbose = verbose

code_sandbox_runtime instance-attribute

code_sandbox_runtime = code_sandbox_runtime

update_callback instance-attribute

update_callback = update_callback

chat

chat(chat, code_interpreter=None)

Conversational interface to the agent. This is the main method to use to interact with the agent. It takes in a list of messages and returns the agent's response as a list of messages.

PARAMETER DESCRIPTION
chat

The input to the agent. This should be a list of AgentMessage objects.

TYPE: List[AgentMessage]

code_interpreter

The code interpreter to use.

TYPE: Optional[CodeInterpreter] DEFAULT: None

RETURNS DESCRIPTION
List[AgentMessage]

List[AgentMessage]: The agent's response as a list of AgentMessage objects.

Source code in vision_agent/agent/vision_agent_v2.py
def chat(
    self,
    chat: List[AgentMessage],
    code_interpreter: Optional[CodeInterpreter] = None,
) -> List[AgentMessage]:
    """Conversational interface to the agent. This is the main method to use to
    interact with the agent. It takes in a list of messages and returns the agent's
    response as a list of messages.

    Parameters:
        chat (List[AgentMessage]): The input to the agent. This should be a list of
            AgentMessage objects.
        code_interpreter (Optional[CodeInterpreter]): The code interpreter to use.

    Returns:
        List[AgentMessage]: The agent's response as a list of AgentMessage objects.
    """

    chat = copy.deepcopy(chat)
    if not chat or chat[-1].role not in {"user", "interaction_response"}:
        raise ValueError(
            f"Last chat message must be from the user or interaction_response, got {chat[-1].role}."
        )

    return_chat = []
    with (
        CodeInterpreterFactory.new_instance(self.code_sandbox_runtime)
        if code_interpreter is None
        else code_interpreter
    ) as code_interpreter:
        int_chat, _, _ = add_media_to_chat(chat, code_interpreter)

        # if we had an interaction and then recieved an observation from the user
        # go back into the same action to finish it.
        action = None
        if check_for_interaction(int_chat):
            action = "generate_or_edit_vision_code"
        else:
            response_context = run_conversation(self.agent, int_chat)
            return_chat.append(
                AgentMessage(role="conversation", content=response_context)
            )
            self.update_callback(return_chat[-1].model_dump())
            action = extract_tag(response_context, "action")

        updated_chat = maybe_run_action(
            self.coder, action, int_chat, code_interpreter=code_interpreter
        )

        # return an interaction early to get users feedback
        if updated_chat is not None and updated_chat[-1].role == "interaction":
            return_chat.extend(updated_chat)
        elif updated_chat is not None and updated_chat[-1].role != "interaction":
            # do not append updated_chat to return_chat becuase the observation
            # from running the action will have already been added via the callbacks
            obs_response_context = run_conversation(
                self.agent, int_chat + return_chat + updated_chat
            )
            return_chat.append(
                AgentMessage(role="conversation", content=obs_response_context)
            )
            self.update_callback(return_chat[-1].model_dump())

    return return_chat

log_progress

log_progress(data)
Source code in vision_agent/agent/vision_agent_v2.py
def log_progress(self, data: Dict[str, Any]) -> None:
    pass

vision_agent.agent.vision_agent_coder_v2.VisionAgentCoderV2

VisionAgentCoderV2(
    planner=None,
    coder=None,
    tester=None,
    debugger=None,
    tool_recommender=None,
    hil=False,
    verbose=False,
    code_sandbox_runtime=None,
    update_callback=lambda _: None,
)

Bases: AgentCoder

VisionAgentCoderV2 is an agent that will write vision code for you.

Initialize the VisionAgentCoderV2.

PARAMETER DESCRIPTION
planner

The planner agent to use for generating vision plans. If None, a default VisionAgentPlannerV2 will be used.

TYPE: Optional[AgentPlanner] DEFAULT: None

coder

The language model to use for the coder agent. If None, a default AnthropicLMM will be used.

TYPE: Optional[LMM] DEFAULT: None

tester

The language model to use for the tester agent. If None, a default AnthropicLMM will be used.

TYPE: Optional[LMM] DEFAULT: None

debugger

The language model to use for the debugger agent.

TYPE: Optional[LMM] DEFAULT: None

tool_recommender

The tool recommender to use.

TYPE: Optional[Union[str, Sim]] DEFAULT: None

hil

Whether to use human-in-the-loop mode.

TYPE: bool DEFAULT: False

verbose

Whether to print out debug information.

TYPE: bool DEFAULT: False

code_sandbox_runtime

The code sandbox runtime to use, can be one of: None or "local". If None, it will read from the environment variable CODE_SANDBOX_RUNTIME.

TYPE: Optional[str] DEFAULT: None

update_callback

The callback function that will send back intermediate conversation messages.

TYPE: Callable[[Dict[str, Any]], None] DEFAULT: lambda _: None

Source code in vision_agent/agent/vision_agent_coder_v2.py
def __init__(
    self,
    planner: Optional[AgentPlanner] = None,
    coder: Optional[LMM] = None,
    tester: Optional[LMM] = None,
    debugger: Optional[LMM] = None,
    tool_recommender: Optional[Union[str, Sim]] = None,
    hil: bool = False,
    verbose: bool = False,
    code_sandbox_runtime: Optional[str] = None,
    update_callback: Callable[[Dict[str, Any]], None] = lambda _: None,
) -> None:
    """Initialize the VisionAgentCoderV2.

    Parameters:
        planner (Optional[AgentPlanner]): The planner agent to use for generating
            vision plans. If None, a default VisionAgentPlannerV2 will be used.
        coder (Optional[LMM]): The language model to use for the coder agent. If
            None, a default AnthropicLMM will be used.
        tester (Optional[LMM]): The language model to use for the tester agent. If
            None, a default AnthropicLMM will be used.
        debugger (Optional[LMM]): The language model to use for the debugger agent.
        tool_recommender (Optional[Union[str, Sim]]): The tool recommender to use.
        hil (bool): Whether to use human-in-the-loop mode.
        verbose (bool): Whether to print out debug information.
        code_sandbox_runtime (Optional[str]): The code sandbox runtime to use, can
            be one of: None or "local". If None, it will read from the
            environment variable CODE_SANDBOX_RUNTIME.
        update_callback (Callable[[Dict[str, Any]], None]): The callback function
            that will send back intermediate conversation messages.
    """

    self.planner = (
        planner
        if planner is not None
        else VisionAgentPlannerV2(
            verbose=verbose, update_callback=update_callback, hil=hil
        )
    )

    self.coder = coder if coder is not None else CONFIG.create_coder()
    self.tester = tester if tester is not None else CONFIG.create_tester()
    self.debugger = debugger if debugger is not None else CONFIG.create_debugger()
    if tool_recommender is not None:
        if isinstance(tool_recommender, str):
            self.tool_recommender = Sim.load(tool_recommender)
        elif isinstance(tool_recommender, Sim):
            self.tool_recommender = tool_recommender
    else:
        self.tool_recommender = get_tool_recommender()

    self.verbose = verbose
    self.code_sandbox_runtime = code_sandbox_runtime
    self.update_callback = update_callback
    if hasattr(self.planner, "update_callback"):
        self.planner.update_callback = update_callback

planner instance-attribute

planner = (
    planner
    if planner is not None
    else VisionAgentPlannerV2(
        verbose=verbose,
        update_callback=update_callback,
        hil=hil,
    )
)

coder instance-attribute

coder = coder if coder is not None else create_coder()

tester instance-attribute

tester = tester if tester is not None else create_tester()

debugger instance-attribute

debugger = (
    debugger if debugger is not None else create_debugger()
)

tool_recommender instance-attribute

tool_recommender = load(tool_recommender)

verbose instance-attribute

verbose = verbose

code_sandbox_runtime instance-attribute

code_sandbox_runtime = code_sandbox_runtime

update_callback instance-attribute

update_callback = update_callback

generate_code

generate_code(chat, max_steps=None, code_interpreter=None)

Generate vision code from a conversation.

PARAMETER DESCRIPTION
chat

The input to the agent. This should be a list of AgentMessage objects.

TYPE: List[AgentMessage]

code_interpreter

The code interpreter to use.

TYPE: Optional[CodeInterpreter] DEFAULT: None

RETURNS DESCRIPTION
CodeContext

The generated code as a CodeContext object which includes the code, test code, whether or not it was exceuted successfully, and the execution result.

TYPE: Union[CodeContext, InteractionContext, ErrorContext]

Source code in vision_agent/agent/vision_agent_coder_v2.py
def generate_code(
    self,
    chat: List[AgentMessage],
    max_steps: Optional[int] = None,
    code_interpreter: Optional[CodeInterpreter] = None,
) -> Union[CodeContext, InteractionContext, ErrorContext]:
    """Generate vision code from a conversation.

    Parameters:
        chat (List[AgentMessage]): The input to the agent. This should be a list of
            AgentMessage objects.
        code_interpreter (Optional[CodeInterpreter]): The code interpreter to use.

    Returns:
        CodeContext: The generated code as a CodeContext object which includes the
            code, test code, whether or not it was exceuted successfully, and the
            execution result.
    """

    chat = copy.deepcopy(chat)
    if not chat or chat[-1].role not in {"user", "interaction_response"}:
        raise ValueError(
            f"Last chat message must be from the user or interaction_response, got {chat[-1].role}."
        )

    with (
        CodeInterpreterFactory.new_instance(self.code_sandbox_runtime)
        if code_interpreter is None
        else code_interpreter
    ) as code_interpreter:
        int_chat, orig_chat, _ = add_media_to_chat(chat, code_interpreter)
        plan_context = self.planner.generate_plan(
            int_chat, max_steps=max_steps, code_interpreter=code_interpreter
        )
        # the planner needs an interaction, so return before generating code
        if isinstance(plan_context, InteractionContext):
            return plan_context
        elif isinstance(plan_context, ErrorContext):
            return plan_context

        code_context = self.generate_code_from_plan(
            orig_chat,
            plan_context,
            code_interpreter,
        )
    return code_context

generate_code_from_plan

generate_code_from_plan(
    chat, plan_context, code_interpreter=None
)

Generate vision code from a conversation and a previously made plan. This will skip the planning step and go straight to generating code.

PARAMETER DESCRIPTION
chat

The input to the agent. This should be a list of AgentMessage objects.

TYPE: List[AgentMessage]

plan_context

The plan context that was previously generated. If plan_context.code is not provided, then the code will be generated from the chat messages.

TYPE: PlanContext

code_interpreter

The code interpreter to use.

TYPE: Optional[CodeInterpreter] DEFAULT: None

RETURNS DESCRIPTION
CodeContext

The generated code as a CodeContext object which includes the code, test code, whether or not it was exceuted successfully, and the execution result.

TYPE: CodeContext

Source code in vision_agent/agent/vision_agent_coder_v2.py
def generate_code_from_plan(
    self,
    chat: List[AgentMessage],
    plan_context: PlanContext,
    code_interpreter: Optional[CodeInterpreter] = None,
) -> CodeContext:
    """Generate vision code from a conversation and a previously made plan. This
    will skip the planning step and go straight to generating code.

    Parameters:
        chat (List[AgentMessage]): The input to the agent. This should be a list of
            AgentMessage objects.
        plan_context (PlanContext): The plan context that was previously generated.
            If plan_context.code is not provided, then the code will be generated
            from the chat messages.
        code_interpreter (Optional[CodeInterpreter]): The code interpreter to use.

    Returns:
        CodeContext: The generated code as a CodeContext object which includes the
            code, test code, whether or not it was exceuted successfully, and the
            execution result.
    """

    chat = copy.deepcopy(chat)
    if not chat or chat[-1].role not in {"user", "interaction_response"}:
        raise ValueError(
            f"Last chat message must be from the user or interaction_response, got {chat[-1].role}."
        )

    # we don't need the user_interaction response for generating code since it's
    # already in the plan context
    while len(chat) > 0 and chat[-1].role != "user":
        chat.pop()

    if not chat:
        raise ValueError("Chat must have at least one user message.")

    with (
        CodeInterpreterFactory.new_instance(self.code_sandbox_runtime)
        if code_interpreter is None
        else code_interpreter
    ) as code_interpreter:
        int_chat, _, media_list = add_media_to_chat(chat, code_interpreter)
        tool_docs = retrieve_tools(plan_context.instructions, self.tool_recommender)

        # If code is not provided from the plan_context then generate it, else use
        # the provided code and start with testing
        if not plan_context.code.strip():
            code = write_code(
                coder=self.coder,
                chat=int_chat,
                tool_docs=tool_docs,
                plan=format_plan_v2(plan_context),
            )
        else:
            code = plan_context.code

        code_context = test_code(
            tester=self.tester,
            debugger=self.debugger,
            chat=int_chat,
            plan=format_plan_v2(plan_context),
            code=code,
            tool_docs=tool_docs,
            code_interpreter=code_interpreter,
            media_list=media_list,
            verbose=self.verbose,
        )

    self.update_callback(
        {
            "role": "coder",
            "content": format_code_context(code_context),
            "media": capture_media_from_exec(code_context.test_result),
        }
    )
    self.update_callback(
        {
            "role": "observation",
            "content": code_context.test_result.text(),
        }
    )
    return code_context

log_progress

log_progress(data)
Source code in vision_agent/agent/vision_agent_coder_v2.py
def log_progress(self, data: Dict[str, Any]) -> None:
    pass

vision_agent.agent.vision_agent_planner_v2.VisionAgentPlannerV2

VisionAgentPlannerV2(
    planner=None,
    summarizer=None,
    critic=None,
    max_steps=10,
    use_multi_trial_planning=False,
    critique_steps=11,
    hil=False,
    verbose=False,
    code_sandbox_runtime=None,
    update_callback=lambda _: None,
)

Bases: AgentPlanner

VisionAgentPlannerV2 is a class that generates a plan to solve a vision task.

Initialize the VisionAgentPlannerV2.

PARAMETER DESCRIPTION
planner

The language model to use for planning. If None, a default AnthropicLMM will be used.

TYPE: Optional[LMM] DEFAULT: None

critic

The language model to use for critiquing the plan. If None, a default AnthropicLMM will be used.

TYPE: Optional[LMM] DEFAULT: None

max_steps

The maximum number of steps to plan.

TYPE: int DEFAULT: 10

use_multi_trial_planning

Whether to use multi-trial planning.

TYPE: bool DEFAULT: False

critique_steps

The number of steps between critiques. If critic steps is larger than max_steps no critiques will be made.

TYPE: int DEFAULT: 11

hil

Whether to use human-in-the-loop mode.

TYPE: bool DEFAULT: False

verbose

Whether to print out debug information.

TYPE: bool DEFAULT: False

code_sandbox_runtime

The code sandbox runtime to use, can be one of: None or "local". If None, it will read from the environment variable CODE_SANDBOX_RUNTIME.

TYPE: Optional[str] DEFAULT: None

update_callback

The callback function that will send back intermediate conversation messages.

TYPE: Callable[[Dict[str, Any]], None] DEFAULT: lambda _: None

Source code in vision_agent/agent/vision_agent_planner_v2.py
def __init__(
    self,
    planner: Optional[LMM] = None,
    summarizer: Optional[LMM] = None,
    critic: Optional[LMM] = None,
    max_steps: int = 10,
    use_multi_trial_planning: bool = False,
    critique_steps: int = 11,
    hil: bool = False,
    verbose: bool = False,
    code_sandbox_runtime: Optional[str] = None,
    update_callback: Callable[[Dict[str, Any]], None] = lambda _: None,
) -> None:
    """Initialize the VisionAgentPlannerV2.

    Parameters:
        planner (Optional[LMM]): The language model to use for planning. If None, a
            default AnthropicLMM will be used.
        critic (Optional[LMM]): The language model to use for critiquing the plan.
            If None, a default AnthropicLMM will be used.
        max_steps (int): The maximum number of steps to plan.
        use_multi_trial_planning (bool): Whether to use multi-trial planning.
        critique_steps (int): The number of steps between critiques. If critic steps
            is larger than max_steps no critiques will be made.
        hil (bool): Whether to use human-in-the-loop mode.
        verbose (bool): Whether to print out debug information.
        code_sandbox_runtime (Optional[str]): The code sandbox runtime to use, can
            be one of: None or "local". If None, it will read from the
            environment variable CODE_SANDBOX_RUNTIME.
        update_callback (Callable[[Dict[str, Any]], None]): The callback function
            that will send back intermediate conversation messages.
    """

    self.planner = planner if planner is not None else CONFIG.create_planner()
    self.summarizer = (
        summarizer if summarizer is not None else CONFIG.create_summarizer()
    )
    self.critic = critic if critic is not None else CONFIG.create_critic()
    self.max_steps = max_steps
    self.use_multi_trial_planning = use_multi_trial_planning
    self.critique_steps = critique_steps

    self.hil = hil
    if self.hil:
        DefaultPlanningImports.imports.append(
            "from vision_agent.tools.planner_tools import get_tool_for_task_human_reviewer as get_tool_for_task"
        )
    self.verbose = verbose
    self.code_sandbox_runtime = code_sandbox_runtime
    self.update_callback = update_callback

planner instance-attribute

planner = (
    planner if planner is not None else create_planner()
)

summarizer instance-attribute

summarizer = (
    summarizer
    if summarizer is not None
    else create_summarizer()
)

critic instance-attribute

critic = critic if critic is not None else create_critic()

max_steps instance-attribute

max_steps = max_steps

use_multi_trial_planning instance-attribute

use_multi_trial_planning = use_multi_trial_planning

critique_steps instance-attribute

critique_steps = critique_steps

hil instance-attribute

hil = hil

verbose instance-attribute

verbose = verbose

code_sandbox_runtime instance-attribute

code_sandbox_runtime = code_sandbox_runtime

update_callback instance-attribute

update_callback = update_callback

generate_plan

generate_plan(chat, max_steps=None, code_interpreter=None)

Generate a plan to solve a vision task.

PARAMETER DESCRIPTION
chat

The conversation messages to generate a plan for.

TYPE: List[AgentMessage]

max_steps

The maximum number of steps to plan.

TYPE: Optional[int] DEFAULT: None

code_interpreter

The code interpreter to use.

TYPE: Optional[CodeInterpreter] DEFAULT: None

RETURNS DESCRIPTION
PlanContext

The generated plan including the instructions and code snippets needed to solve the task.

TYPE: Union[PlanContext, InteractionContext, ErrorContext]

Source code in vision_agent/agent/vision_agent_planner_v2.py
def generate_plan(
    self,
    chat: List[AgentMessage],
    max_steps: Optional[int] = None,
    code_interpreter: Optional[CodeInterpreter] = None,
) -> Union[PlanContext, InteractionContext, ErrorContext]:
    """Generate a plan to solve a vision task.

    Parameters:
        chat (List[AgentMessage]): The conversation messages to generate a plan for.
        max_steps (Optional[int]): The maximum number of steps to plan.
        code_interpreter (Optional[CodeInterpreter]): The code interpreter to use.

    Returns:
        PlanContext: The generated plan including the instructions and code snippets
            needed to solve the task.
    """

    if not chat or chat[-1].role not in {"user", "interaction_response"}:
        raise ValueError(
            f"Last chat message must be from the user or interaction_response, got {chat[-1].role}."
        )

    chat = copy.deepcopy(chat)
    max_steps = max_steps or self.max_steps

    with (
        CodeInterpreterFactory.new_instance(self.code_sandbox_runtime)
        if code_interpreter is None
        else code_interpreter
    ) as code_interpreter:
        critque_steps = 1
        finished = False
        interaction = False
        int_chat, _, media_list = add_media_to_chat(chat, code_interpreter)
        int_chat = replace_interaction_with_obs(int_chat)

        step = get_steps(int_chat, max_steps)
        if "<count>" not in int_chat[-1].content and step == max_steps:
            int_chat[-1].content += f"\n<count>{step}</count>\n"

        while step > 0 and not finished and not interaction:
            if self.use_multi_trial_planning:
                response = run_multi_trial_planning(
                    int_chat, media_list, self.planner
                )
            else:
                response = run_planning(int_chat, media_list, self.planner)

            response = response_safeguards(response)
            thinking = extract_tag(response, "thinking")
            code = extract_tag(
                response, "execute_python", extract_markdown="python"
            )
            finalize_plan = extract_tag(response, "finalize_plan")
            finished = finalize_plan is not None
            self.update_callback({"role": "planner_update", "content": response})

            if self.verbose:
                _CONSOLE.print(
                    f"[bold cyan]Step {step}:[/bold cyan] [green]{thinking}[/green]"
                )
                if finalize_plan is not None:
                    _CONSOLE.print(
                        f"[bold cyan]Finalizing Plan:[/bold cyan] [magenta]{finalize_plan}[/magenta]"
                    )

            updated_chat = maybe_run_code(
                code,
                response,
                int_chat,
                media_list,
                self.planner,
                code_interpreter,
                hil=self.hil,
                verbose=self.verbose,
            )
            interaction = updated_chat[-1].role == "interaction"

            if critque_steps % self.critique_steps == 0:
                critique = run_critic(int_chat, media_list, self.critic)
                if critique is not None and int_chat[-1].role == "observation":
                    _CONSOLE.print(
                        f"[bold cyan]Critique:[/bold cyan] [red]{critique}[/red]"
                    )
                    critique_str = f"\n[critique]\n{critique}\n[end of critique]"
                    updated_chat[-1].content += critique_str
                    # if plan was critiqued, ensure we don't finish so we can
                    # respond to the critique
                    finished = False

            critque_steps += 1
            step -= 1
            updated_chat[-1].content += f"\n<count>{step}</count>\n"
            int_chat.extend(updated_chat)
            for chat_elt in updated_chat:
                self.update_callback(chat_elt.model_dump())

        context: Union[PlanContext, InteractionContext, ErrorContext]
        if interaction:
            context = InteractionContext(chat=int_chat)
        else:
            updated_chat, context = create_finalize_plan(
                self.summarizer, int_chat, self.verbose
            )
            int_chat.extend(updated_chat)
            for chat_elt in updated_chat:
                self.update_callback(chat_elt.model_dump())

    return context

log_progress

log_progress(data)
Source code in vision_agent/agent/vision_agent_planner_v2.py
def log_progress(self, data: Dict[str, Any]) -> None:
    pass