Skip to content


Module for making predictions on LandingLens models.


Bases: Predictor

EdgePredictor runs local inference by connecting to an edge inference service (e.g. LandingEdge)

Source code in landingai/
class EdgePredictor(Predictor):
    """`EdgePredictor` runs local inference by connecting to an edge inference service (e.g. LandingEdge)"""

    def __init__(
        host: str = "localhost",
        port: int = 8000,
        check_server_ready: bool = True,
    ) -> None:
        """By default the inference service runs on `localhost:8000`

        host : str, optional
            Hostname or IP, by default "localhost"
        port : int, optional
            Port, by default 8000
        check_server_ready : bool, optional
            Check if the inference server is running, by default True
        self._url = f"http://{host}:{port}/images"
        # Check if the inference server is reachable
        if check_server_ready and not self._check_connectivity(host=(host, port)):
            raise ConnectionError(
                f"Failed to connect to the model server. Please check if the server is running and the connection url ({self._url})."
        self._session = _create_session(
                "contentType": "multipart/form-data"
            },  # No retries for the inference service

    def predict(
        image: Union[np.ndarray, PIL.Image.Image],
        metadata: Optional[InferenceMetadata] = None,
        reuse_session: bool = True,
        **kwargs: Any,
    ) -> List[Prediction]:
        """Run Edge inference on the input image and return the prediction result.

            The input image to be predicted
            The (optional) metadata associated with this inference/image.
            Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens.
            Note: The metadata is not reported back to LandingLens by default unless the edge inference server (i.e. ModelRunner) enables the feature of reporting historical inference results.

            See `landingai.common.InferenceMetadata` for more details.
            Whether to reuse the HTTPS session for sending multiple inference requests. By default, the session is reused to improve the performance on high latency networks (e.g. fewer SSL negotiations). If you are sending requests from multiple threads, set this to False.
            A list of prediction result.
        buffer_bytes = serialize_image(image)
        files = {"file": buffer_bytes}
        data = {"metadata": metadata.json()} if metadata else None
        if reuse_session:
            session = self._session
            session = _create_session(
                    "contentType": "multipart/form-data"
                },  # No retries for the inference service
        (preds, self._performance_metrics) = _do_inference(
            session, self._url, files, {}, _EdgeExtractor, data=data
        return preds

__init__(host='localhost', port=8000, check_server_ready=True)

By default the inference service runs on localhost:8000


host : str, optional Hostname or IP, by default "localhost" port : int, optional Port, by default 8000 check_server_ready : bool, optional Check if the inference server is running, by default True

Source code in landingai/
def __init__(
    host: str = "localhost",
    port: int = 8000,
    check_server_ready: bool = True,
) -> None:
    """By default the inference service runs on `localhost:8000`

    host : str, optional
        Hostname or IP, by default "localhost"
    port : int, optional
        Port, by default 8000
    check_server_ready : bool, optional
        Check if the inference server is running, by default True
    self._url = f"http://{host}:{port}/images"
    # Check if the inference server is reachable
    if check_server_ready and not self._check_connectivity(host=(host, port)):
        raise ConnectionError(
            f"Failed to connect to the model server. Please check if the server is running and the connection url ({self._url})."
    self._session = _create_session(
            "contentType": "multipart/form-data"
        },  # No retries for the inference service

predict(image, metadata=None, reuse_session=True, **kwargs)

Run Edge inference on the input image and return the prediction result.


image The input image to be predicted metadata The (optional) metadata associated with this inference/image. Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens. Note: The metadata is not reported back to LandingLens by default unless the edge inference server (i.e. ModelRunner) enables the feature of reporting historical inference results.

See `landingai.common.InferenceMetadata` for more details.

reuse_session Whether to reuse the HTTPS session for sending multiple inference requests. By default, the session is reused to improve the performance on high latency networks (e.g. fewer SSL negotiations). If you are sending requests from multiple threads, set this to False. Returns

List[Prediction] A list of prediction result.

Source code in landingai/
def predict(
    image: Union[np.ndarray, PIL.Image.Image],
    metadata: Optional[InferenceMetadata] = None,
    reuse_session: bool = True,
    **kwargs: Any,
) -> List[Prediction]:
    """Run Edge inference on the input image and return the prediction result.

        The input image to be predicted
        The (optional) metadata associated with this inference/image.
        Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens.
        Note: The metadata is not reported back to LandingLens by default unless the edge inference server (i.e. ModelRunner) enables the feature of reporting historical inference results.

        See `landingai.common.InferenceMetadata` for more details.
        Whether to reuse the HTTPS session for sending multiple inference requests. By default, the session is reused to improve the performance on high latency networks (e.g. fewer SSL negotiations). If you are sending requests from multiple threads, set this to False.
        A list of prediction result.
    buffer_bytes = serialize_image(image)
    files = {"file": buffer_bytes}
    data = {"metadata": metadata.json()} if metadata else None
    if reuse_session:
        session = self._session
        session = _create_session(
                "contentType": "multipart/form-data"
            },  # No retries for the inference service
    (preds, self._performance_metrics) = _do_inference(
        session, self._url, files, {}, _EdgeExtractor, data=data
    return preds


Bases: Predictor

A class that calls your OCR inference endpoint on the LandingLens platform.

Source code in landingai/
class OcrPredictor(Predictor):
    """A class that calls your OCR inference endpoint on the LandingLens platform."""

    _url: str = ""

    def __init__(
        threshold: float = 0.5,
        language: Literal["en", "ch"] = "ch",
        api_key: Optional[str] = None,
    ) -> None:
        """OCR Predictor constructor

            The minimum confidence threshold of the prediction to keep, by default 0.5
            The API Key of your LandingLens organization.
            If not provided, it will try to load from the environment variable
            LANDINGAI_API_KEY or from the .env file.
            Specifies the character set to use. Can either be `"en"` for English
            or `"ch"` for Chinese and English (default).
        self._threshold = threshold
        self._language = language
        self._api_credential = load_api_credential(api_key)
        extra_x_event = {
            "model_type": "ocr",
        headers = self._build_default_headers(self._api_credential, extra_x_event)
        self._session = _create_session(Predictor._url, self._num_retry, headers)

        # All customers have a quota of images per minute. If the server return a 429, then we will wait 60 seconds and retry
        before_sleep=before_sleep_log(_LOGGER, logging.WARNING),
    def predict(  # type: ignore
        self, image: Union[np.ndarray, PIL.Image.Image], **kwargs: Any
    ) -> List[Prediction]:
        """Run OCR on the input image and return the prediction result.

            The input image to be predicted
            The mode of this prediction. It can be either "multi-text" (default) or "single-text".
            In "multi-text" mode, the predictor will detect multiple lines of text in the image.
            In "single-text" mode, the predictor will detect a single line of text in the image.
            A list of region of interest boxes/quadrilateral. Each quadrilateral is a list of 4 points (x, y).
            In "single-text" mode, the caller must provide a list of quadrilateral(s) that cover the text in the image.
            Each quadrilateral is a list of 4 points (x, y), and it should cover a single line of text in the image.
            In "multi-text" mode, regions_of_interest is not required. If it is None, the whole image will be used as the region of interest.

            A list of OCR prediction result.

        buffer_bytes = serialize_image(image)
        files = {"images": buffer_bytes}
        mode: str = kwargs.get("mode", "multi-text")
        if mode not in ["multi-text", "single-text"]:
            raise ValueError(
                f"mode must be either 'multi-text' or 'single-text', but got: {mode}"
        if mode == "single-text" and "regions_of_interest" not in kwargs:
            raise ValueError(
                "regions_of_interest parameter must be provided in single-text mode."
        data: Dict[str, Any]
        data = {"language": self._language}
        if rois := kwargs.get("regions_of_interest", []):
            data["rois"] = serialize_rois(rois, mode)

        (preds, self._performance_metrics) = _do_inference(
        return [pred for pred in preds if pred.score >= self._threshold]

__init__(threshold=0.5, *, language='ch', api_key=None)

OCR Predictor constructor


threshold: The minimum confidence threshold of the prediction to keep, by default 0.5 api_key The API Key of your LandingLens organization. If not provided, it will try to load from the environment variable LANDINGAI_API_KEY or from the .env file. language: Specifies the character set to use. Can either be "en" for English or "ch" for Chinese and English (default).

Source code in landingai/
def __init__(
    threshold: float = 0.5,
    language: Literal["en", "ch"] = "ch",
    api_key: Optional[str] = None,
) -> None:
    """OCR Predictor constructor

        The minimum confidence threshold of the prediction to keep, by default 0.5
        The API Key of your LandingLens organization.
        If not provided, it will try to load from the environment variable
        LANDINGAI_API_KEY or from the .env file.
        Specifies the character set to use. Can either be `"en"` for English
        or `"ch"` for Chinese and English (default).
    self._threshold = threshold
    self._language = language
    self._api_credential = load_api_credential(api_key)
    extra_x_event = {
        "model_type": "ocr",
    headers = self._build_default_headers(self._api_credential, extra_x_event)
    self._session = _create_session(Predictor._url, self._num_retry, headers)

predict(image, **kwargs)

Run OCR on the input image and return the prediction result.


image: The input image to be predicted mode: The mode of this prediction. It can be either "multi-text" (default) or "single-text". In "multi-text" mode, the predictor will detect multiple lines of text in the image. In "single-text" mode, the predictor will detect a single line of text in the image. regions_of_interest: A list of region of interest boxes/quadrilateral. Each quadrilateral is a list of 4 points (x, y). In "single-text" mode, the caller must provide a list of quadrilateral(s) that cover the text in the image. Each quadrilateral is a list of 4 points (x, y), and it should cover a single line of text in the image. In "multi-text" mode, regions_of_interest is not required. If it is None, the whole image will be used as the region of interest.


List[OcrPrediction] A list of OCR prediction result.

Source code in landingai/
    # All customers have a quota of images per minute. If the server return a 429, then we will wait 60 seconds and retry
    before_sleep=before_sleep_log(_LOGGER, logging.WARNING),
def predict(  # type: ignore
    self, image: Union[np.ndarray, PIL.Image.Image], **kwargs: Any
) -> List[Prediction]:
    """Run OCR on the input image and return the prediction result.

        The input image to be predicted
        The mode of this prediction. It can be either "multi-text" (default) or "single-text".
        In "multi-text" mode, the predictor will detect multiple lines of text in the image.
        In "single-text" mode, the predictor will detect a single line of text in the image.
        A list of region of interest boxes/quadrilateral. Each quadrilateral is a list of 4 points (x, y).
        In "single-text" mode, the caller must provide a list of quadrilateral(s) that cover the text in the image.
        Each quadrilateral is a list of 4 points (x, y), and it should cover a single line of text in the image.
        In "multi-text" mode, regions_of_interest is not required. If it is None, the whole image will be used as the region of interest.

        A list of OCR prediction result.

    buffer_bytes = serialize_image(image)
    files = {"images": buffer_bytes}
    mode: str = kwargs.get("mode", "multi-text")
    if mode not in ["multi-text", "single-text"]:
        raise ValueError(
            f"mode must be either 'multi-text' or 'single-text', but got: {mode}"
    if mode == "single-text" and "regions_of_interest" not in kwargs:
        raise ValueError(
            "regions_of_interest parameter must be provided in single-text mode."
    data: Dict[str, Any]
    data = {"language": self._language}
    if rois := kwargs.get("regions_of_interest", []):
        data["rois"] = serialize_rois(rois, mode)

    (preds, self._performance_metrics) = _do_inference(
    return [pred for pred in preds if pred.score >= self._threshold]


A class that calls your inference endpoint on the LandingLens platform.

Source code in landingai/
class Predictor:
    """A class that calls your inference endpoint on the LandingLens platform."""

    _url: str = ""
    _num_retry: int = 3

    def __init__(
        endpoint_id: str,
        api_key: Optional[str] = None,
        check_server_ready: bool = True,
    ) -> None:
        """Predictor constructor

            A unique string that identifies your inference endpoint.
            This string can be found in the URL of your inference endpoint.
            Example: "9f237028-e630-4576-8826-f35ab9000abc" is the endpoint id in this URL:
            The API Key of your LandingLens organization.
            If not provided, it will try to load from the environment variable
            LANDINGAI_API_KEY or from the .env file.
        check_server_ready : bool, optional
            Check if the cloud inference service is reachable, by default True
        # Check if the cloud inference service is reachable
        if check_server_ready and not self._check_connectivity(url=Predictor._url):
            raise ConnectionError(
                f"Failed to connect to the cloud inference service. Check that {Predictor._url} is accesible from this device"

        self._endpoint_id = endpoint_id
        self._api_credential = load_api_credential(api_key)
        extra_x_event = {
            "endpoint_id": self._endpoint_id,
            "model_type": "fast_and_easy",
        headers = self._build_default_headers(self._api_credential, extra_x_event)
        self._session = _create_session(Predictor._url, self._num_retry, headers)
        # performance_metrics keeps performance metrics for the last call to _do_inference()
        self._performance_metrics: Dict[str, int] = {}

    def _check_connectivity(
        self, url: Optional[str] = None, host: Optional[Tuple[str, int]] = None
    ) -> bool:
        if url:
            parsed_url = urlparse(url)
            if parsed_url.port:
                port = parsed_url.port
            elif parsed_url.scheme == "https":
                port = 443
            elif parsed_url.scheme == "http":
                port = 80
                port = socket.getservbyname(parsed_url.scheme)
            host = (parsed_url.hostname, port)  # type: ignore

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex(host)  # type: ignore
        # print(f"Checking if {host[0]}:{host[1]} is open (res={result})")
        return result == 0

    def _build_default_headers(
        self, api_key: APIKey, extra_x_event: Optional[Dict[str, str]] = None
    ) -> Dict[str, str]:
        """Build the HTTP headers for the request to the Cloud inference endpoint(s)."""
        tracked_properties = get_runtime_environment_info()
        if extra_x_event:
        tracking_data = {
            "event": "inference",
            "action": "POST",
            "properties": tracked_properties,
        return {
            "contentType": "multipart/form-data",
            "apikey": api_key.api_key,
            "X-event": json.dumps(tracking_data),

        # All customers have a quota of images per minute. If the server return a 429, then we will wait 60 seconds and retry. Note that we will retry forever on 429s which is ok since the rate limiter will eventually allow the request to go through.
        before_sleep=before_sleep_log(_LOGGER, logging.WARNING),
    def predict(
        image: Union[np.ndarray, PIL.Image.Image],
        metadata: Optional[InferenceMetadata] = None,
        **kwargs: Any,
    ) -> List[Prediction]:
        """Call the inference endpoint and return the prediction result.

            The input image to be predicted. The image should be in the RGB format if it has three channels.
            The (optional) metadata associated with this inference/image.
            Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens.

            See `landingai.common.InferenceMetadata` for more information.

        The inference result in a list of dictionary
            Each dictionary is a prediction result.
            The inference result has been filtered by the confidence threshold set in LandingLens and sorted by confidence score in descending order.
        buffer_bytes = serialize_image(image)
        files = {"file": buffer_bytes}
        query_params = {
            "endpoint_id": self._endpoint_id,
        data = {"metadata": metadata.json()} if metadata else None
        (preds, self._performance_metrics) = _do_inference(
        return preds

    def get_metrics(self) -> Dict[str, int]:
        Return the performance metrics for the last inference call.

            A dictionary containing the performance metrics.
                "decoding_s": 0.0084266,
                "infer_s": 3.3537345,
                "postprocess_s": 0.0255059,
                "preprocess_s": 0.0124037,
                "waiting_s": 0.0001487
        return self._performance_metrics

__init__(endpoint_id, *, api_key=None, check_server_ready=True)

Predictor constructor


endpoint_id A unique string that identifies your inference endpoint. This string can be found in the URL of your inference endpoint. Example: "9f237028-e630-4576-8826-f35ab9000abc" is the endpoint id in this URL: api_key The API Key of your LandingLens organization. If not provided, it will try to load from the environment variable LANDINGAI_API_KEY or from the .env file. check_server_ready : bool, optional Check if the cloud inference service is reachable, by default True

Source code in landingai/
def __init__(
    endpoint_id: str,
    api_key: Optional[str] = None,
    check_server_ready: bool = True,
) -> None:
    """Predictor constructor

        A unique string that identifies your inference endpoint.
        This string can be found in the URL of your inference endpoint.
        Example: "9f237028-e630-4576-8826-f35ab9000abc" is the endpoint id in this URL:
        The API Key of your LandingLens organization.
        If not provided, it will try to load from the environment variable
        LANDINGAI_API_KEY or from the .env file.
    check_server_ready : bool, optional
        Check if the cloud inference service is reachable, by default True
    # Check if the cloud inference service is reachable
    if check_server_ready and not self._check_connectivity(url=Predictor._url):
        raise ConnectionError(
            f"Failed to connect to the cloud inference service. Check that {Predictor._url} is accesible from this device"

    self._endpoint_id = endpoint_id
    self._api_credential = load_api_credential(api_key)
    extra_x_event = {
        "endpoint_id": self._endpoint_id,
        "model_type": "fast_and_easy",
    headers = self._build_default_headers(self._api_credential, extra_x_event)
    self._session = _create_session(Predictor._url, self._num_retry, headers)
    # performance_metrics keeps performance metrics for the last call to _do_inference()
    self._performance_metrics: Dict[str, int] = {}


Return the performance metrics for the last inference call.


Name Type Description
Dict[str, int]

A dictionary containing the performance metrics.

Example Dict[str, int]
Dict[str, int]

{ "decoding_s": 0.0084266, "infer_s": 3.3537345, "postprocess_s": 0.0255059, "preprocess_s": 0.0124037, "waiting_s": 0.0001487

Dict[str, int]


Source code in landingai/
def get_metrics(self) -> Dict[str, int]:
    Return the performance metrics for the last inference call.

        A dictionary containing the performance metrics.
            "decoding_s": 0.0084266,
            "infer_s": 3.3537345,
            "postprocess_s": 0.0255059,
            "preprocess_s": 0.0124037,
            "waiting_s": 0.0001487
    return self._performance_metrics

predict(image, metadata=None, **kwargs)

Call the inference endpoint and return the prediction result.


image The input image to be predicted. The image should be in the RGB format if it has three channels. metadata The (optional) metadata associated with this inference/image. Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens.

See `landingai.common.InferenceMetadata` for more information.

The inference result in a list of dictionary Each dictionary is a prediction result. The inference result has been filtered by the confidence threshold set in LandingLens and sorted by confidence score in descending order.

Source code in landingai/
    # All customers have a quota of images per minute. If the server return a 429, then we will wait 60 seconds and retry. Note that we will retry forever on 429s which is ok since the rate limiter will eventually allow the request to go through.
    before_sleep=before_sleep_log(_LOGGER, logging.WARNING),
def predict(
    image: Union[np.ndarray, PIL.Image.Image],
    metadata: Optional[InferenceMetadata] = None,
    **kwargs: Any,
) -> List[Prediction]:
    """Call the inference endpoint and return the prediction result.

        The input image to be predicted. The image should be in the RGB format if it has three channels.
        The (optional) metadata associated with this inference/image.
        Metadata is helpful for attaching additional information to the inference result so you can later filter the historical inference results by your custom values in LandingLens.

        See `landingai.common.InferenceMetadata` for more information.

    The inference result in a list of dictionary
        Each dictionary is a prediction result.
        The inference result has been filtered by the confidence threshold set in LandingLens and sorted by confidence score in descending order.
    buffer_bytes = serialize_image(image)
    files = {"file": buffer_bytes}
    query_params = {
        "endpoint_id": self._endpoint_id,
    data = {"metadata": metadata.json()} if metadata else None
    (preds, self._performance_metrics) = _do_inference(
    return preds

serialize_rois(rois, mode)

Serialize the regions of interest into a JSON string.

Source code in landingai/
def serialize_rois(rois: List[List[Tuple[int, int]]], mode: str) -> str:
    """Serialize the regions of interest into a JSON string."""
    rois_payload = [
            "location": [{"x": coord[0], "y": coord[1]} for coord in roi],
            "mode": mode,
        for roi in rois
    return json.dumps([rois_payload])