We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 512ba53 commit 2e29214Copy full SHA for 2e29214
1 file changed
Quick_Deploy/vLLM/model_repository/vllm/1/model.py
@@ -167,6 +167,8 @@ async def generate(self, request):
167
try:
168
request_id = random_uuid()
169
prompt = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()[0]
170
+ if isinstance(prompt, bytes):
171
+ prompt = prompt.decode("utf-8")
172
stream = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()[0]
173
174
# Request parameters are not yet supported via
@@ -184,7 +186,7 @@ async def generate(self, request):
184
186
185
187
last_output = None
188
async for output in self.llm_engine.generate(
- str(prompt), sampling_params, request_id
189
+ prompt, sampling_params, request_id
190
):
191
if stream:
192
response_sender.send(self.create_response(output))
0 commit comments