Skip to content

Commit 2e29214

Browse files
authored
Decode bytes input to utf-8 string before passing to vllm engine (#57)
1 parent 512ba53 commit 2e29214

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

  • Quick_Deploy/vLLM/model_repository/vllm/1

Quick_Deploy/vLLM/model_repository/vllm/1/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ async def generate(self, request):
167167
try:
168168
request_id = random_uuid()
169169
prompt = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()[0]
170+
if isinstance(prompt, bytes):
171+
prompt = prompt.decode("utf-8")
170172
stream = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()[0]
171173

172174
# Request parameters are not yet supported via
@@ -184,7 +186,7 @@ async def generate(self, request):
184186

185187
last_output = None
186188
async for output in self.llm_engine.generate(
187-
str(prompt), sampling_params, request_id
189+
prompt, sampling_params, request_id
188190
):
189191
if stream:
190192
response_sender.send(self.create_response(output))

0 commit comments

Comments
 (0)