File tree Expand file tree Collapse file tree
Quick_Deploy/vLLM/model_repository/vllm Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -166,15 +166,20 @@ async def generate(self, request):
166166 self .ongoing_request_count += 1
167167 try :
168168 request_id = random_uuid ()
169+
169170 prompt = pb_utils .get_input_tensor_by_name (request , "PROMPT" ).as_numpy ()[0 ]
170171 if isinstance (prompt , bytes ):
171172 prompt = prompt .decode ("utf-8" )
172- stream = pb_utils .get_input_tensor_by_name (request , "STREAM" ).as_numpy ()[0 ]
173+
174+ # stream is an optional input
175+ stream = False
176+ stream_input_tensor = pb_utils .get_input_tensor_by_name (request , "STREAM" )
177+ if stream_input_tensor :
178+ stream = stream_input_tensor .as_numpy ()[0 ]
173179
174180 # Request parameters are not yet supported via
175181 # BLS. Provide an optional mechanism to receive serialized
176182 # parameters as an input tensor until support is added
177-
178183 parameters_input_tensor = pb_utils .get_input_tensor_by_name (request , "SAMPLING_PARAMETERS" )
179184 if parameters_input_tensor :
180185 parameters = parameters_input_tensor .as_numpy ()[0 ].decode ("utf-8" )
Original file line number Diff line number Diff line change @@ -49,6 +49,7 @@ input [
4949 name: " STREAM"
5050 data_type: TYPE_BOOL
5151 dims: [ 1 ]
52+ optional: true
5253 },
5354 {
5455 name: " SAMPLING_PARAMETERS"
You can’t perform that action at this time.
0 commit comments