from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
# List of image file paths
"/root/ld/ld_project/MiniCPM-V/assets/airplane.jpeg", # Local image path
# Change this to your quantized AWQ model path
MODEL_NAME = "/root/ld/ld_model_pretrained/Minicpmv2_6" # AWQ model path
# Open and convert the image
image = Image.open(IMAGES[0]).convert("RGB")
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
# Initialize the language model
llm = LLM(model=MODEL_NAME,
gpu_memory_utilization=1, # Use all GPU memory
max_model_len=2048) # Adjust this value according to memory availability
# Build the conversation message
messages = [{'role': 'user', 'content': '(<image>./</image>)\n' + 'Please describe this picture'}]
# Apply the conversation template to the messages
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# stop_token_ids = [tokenizer.eos_id]
#stop_token_ids = [tokenizer.eos_id, tokenizer.eot_id]
stop_tokens = ['<|im_end|>', '<|endoftext|>']
stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
sampling_params = SamplingParams(
stop_token_ids=stop_token_ids,
outputs = llm.generate({
}, sampling_params=sampling_params)
print(outputs[0].outputs[0].text)