FlorenceQA#
This example demonstrates using the Florence2-QA tool to to answer questions about images.
NOTE: The FlorenceQA model can only be used in GPU environments.
from vision_agent_tools.models.florence2_qa import FlorenceQA
# (replace this path with your own!)
test_image = "path/to/your/image.jpg"
# Load the image and create initialize the FlorenceQA model
image = Image.open(test_image)
run_florence_qa = FlorenceQA()
# Time to put FlorenceQA to work! Let's pose a question about the image
answer = run_florence_qa(image, question="Is there a dog in the image?")
# Print the output answer
print(answer)
FlorenceQA
#
Bases: BaseMLModel
FlorenceQA is a tool that combines the Florence-2 and Roberta QA models to answer questions about images.
NOTE: The Florence-2 model can only be used in GPU environments.
__call__(image, question)
#
FlorenceQA model answers questions about images.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
image |
Image
|
The image to be analyzed. |
required |
question |
str
|
The question to be answered. |
required |
Returns:
Name | Type | Description |
---|---|---|
str |
dict[str, Any]
|
The answer to the question. |
__init__()
#
Initializes the FlorenceQA model.