import requests
import base64
from PIL import Image
from io import BytesIO
import time

class VLMessageClient:
    def __init__(self, api_url):
        self.api_url = api_url
        self.session = requests.Session()

    def _encode_image(self, image_url):
        try:
            response = requests.get(image_url)
            response.raise_for_status()
            img = Image.open(BytesIO(response.content))
            img = img.convert("RGB")
            buffered = BytesIO()
            img.save(buffered, format="JPEG", quality=95)
            return base64.b64encode(buffered.getvalue()).decode("utf-8")
        except Exception as e:
            print(f"图像编码出错: {e}")
            return None

    def build_messages(self, image_url, text_prompt):
        base64_image = self._encode_image(image_url)
        if base64_image is None:
            return None
        return [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
                    },
                    {
                        "type": "text",
                        "text": text_prompt
                    }
                ]
            }
        ]

    def process_item(self, image_url, text_prompt):
        max_retries = 3
        attempt = 0
        result = None

        while attempt < max_retries:
            try:
                attempt += 1
                messages = self.build_messages(image_url, text_prompt)
                if messages is None:
                    break

                payload = {
                    "model": "Qwen/Qwen2.5-VL-7B-Instruct",
                    "messages": messages,
                    "temperature": 0.8,
                    "max_tokens": 500,
                    "stop": ["<|EndOfText|>"]
                }

                response = self.session.post(
                    f"{self.api_url}/v1/chat/completions",
                    json=payload,
                    timeout=30 + attempt * 5,
                    headers={"Content-Type": "application/json"}
                )
                response.raise_for_status()

                result = response.json()["choices"][0]["message"]["content"]
                break
            except Exception as e:
                if attempt == max_retries:
                    print(f"请求失败(已达最大重试次数): {str(e)}")
                else:
                    sleep_time = min(2 ** attempt, 10)
                    time.sleep(sleep_time)

        return result


if __name__ == "__main__":
    API_URL = "http://192.168.30.226:8000"
    image_url = "https://pic.52112.com/180425/180425_178/hI0jtndlwe_small.jpg"
    text_prompt = "这张图片中展示了什么场景?请详细描述。"

    client = VLMessageClient(API_URL)
    result = client.process_item(image_url, text_prompt)
    if result:
        print("生成结果:")
        print(result)
    else:
        print("未得到有效结果")
Logo

欢迎加入 MCP 技术社区!与志同道合者携手前行,一同解锁 MCP 技术的无限可能!

更多推荐