Add more parameters to vLLM tutorial (#53)

the-david-oy · web-flow · commit e835cf4a41f2 · 2023-10-01T20:49:48.000-07:00
* Remove unused UserData class

* Add more vLLM parameters

* Fix params, add others
diff --git a/Quick_Deploy/vLLM/client.py b/Quick_Deploy/vLLM/client.py
@@ -36,10 +36,6 @@
 from tritonclient.utils import *
 
 
-class UserData:
-    def __init__(self):
-        self._completed_requests = queue.Queue()
-
 
 def create_request(prompt, stream, request_id, sampling_parameters, model_name, send_parameters_as_tensor=True):
     inputs = []
diff --git a/Quick_Deploy/vLLM/model_repository/vllm/1/model.py b/Quick_Deploy/vLLM/model_repository/vllm/1/model.py
@@ -127,12 +127,21 @@ def get_sampling_params_dict(self, params_json):
         params_dict = json.loads(params_json)
 
         # Special parsing for the supported sampling parameters
-        # TODO: Add more parameters if needed
-        float_keys = ["temperature", "top_p"]
+        bool_keys = ["ignore_eos", "skip_special_tokens", "use_beam_search"]
+        for k in bool_keys:
+            if k in params_dict:
+                params_dict[k] = bool(params_dict[k])
+
+        float_keys = ["frequency_penalty", "length_penalty", "presence_penalty", "temperature", "top_p"]
         for k in float_keys:
             if k in params_dict:
                 params_dict[k] = float(params_dict[k])
 
+        int_keys = ["best_of", "max_tokens", "n", "top_k"]
+        for k in int_keys:
+            if k in params_dict:
+                params_dict[k] = int(params_dict[k])
+
         return params_dict
 
     def create_response(self, vllm_output):