Florence-2#
This example demonstrates using the Florence2 tool to interpret simple text prompts to perform tasks like captioning, object detection, and segmentation.
from vision_agent_tools.shared_types import PromptTask
from vision_agent_tools.models.florence2 import Florence2
# (replace this path with your own!)
test_image = "path/to/your/image.jpg"
# Choose the task that you are planning to use
task_prompt = PromptTask.CAPTION
# Load the image and create initialize the Florence2 model
image = Image.open(test_image)
model = Florence2()
# Time to put Florence2 to work! Let's see what it finds...
results = model(images=[image], task=task_prompt)
# Print the output result
print(f"The image contains: {results[0]}")
Florence2
#
Bases: BaseMLModel
Florence2 model. It supported both zero-shot and fine-tuned settings. For the zero-shot we use the Florence-2-large. For fine-tuning we use the Florence-2-base-ft. This model can interpret simple text prompts to perform tasks like captioning, object detection, and segmentation.
__call__(task, prompt='', images=None, video=None, *, batch_size=5, nms_threshold=0.3, chunk_length_frames=None)
#
Performs inference on the Florence-2 model based on the provided task, images or video, and prompt.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
task |
PromptTask
|
The task to be performed on the images or video. |
required |
prompt |
Optional[str]
|
The text input that complements the prompt task. |
''
|
images |
list[Image] | None
|
A list of images for the model to process. None if using video. |
None
|
video |
VideoNumpy | None
|
A NumPy representation of the video for inference. None if using images. |
None
|
batch_size |
int
|
The batch size used for processing multiple images or video frames. |
5
|
nms_threshold |
float
|
The IoU threshold value used to apply a dummy agnostic Non-Maximum Suppression (NMS). |
0.3
|
chunk_length_frames |
int | None
|
The number of frames for each chunk of video to analyze. The last chunk may have fewer frames. |
None
|
Returns:
Name | Type | Description |
---|---|---|
Florence2ResponseType |
Florence2ResponseType
|
The output of the Florence-2 model based on the task and prompt. |
__init__(model_config=Florence2Config())
#
Initializes the Florence2 model.
fine_tune(checkpoint)
#
Load the fine-tuned Florence-2 model.
load_base()
#
Load the base Florence-2 model.