From 86b18c840b3735289d7d9f85441b5bd42339345d Mon Sep 17 00:00:00 2001 From: Montana Low Date: Fri, 16 Jun 2023 01:19:39 -0400 Subject: [PATCH 1/4] ggml compatibility --- pgml-extension/requirements.txt | 1 + pgml-extension/src/bindings/transformers.py | 29 ++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pgml-extension/requirements.txt b/pgml-extension/requirements.txt index d458507bf..6ec217773 100644 --- a/pgml-extension/requirements.txt +++ b/pgml-extension/requirements.txt @@ -1,4 +1,5 @@ accelerate==0.19.0 +ctransformers==0.2.8 datasets==2.12.0 deepspeed==0.9.2 huggingface-hub==0.14.1 diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py index 70eb35aee..e3f03ae0c 100644 --- a/pgml-extension/src/bindings/transformers.py +++ b/pgml-extension/src/bindings/transformers.py @@ -3,6 +3,7 @@ import shutil import time +import ctransformers import datasets from InstructorEmbedding import INSTRUCTOR import numpy @@ -90,20 +91,25 @@ def transform(task, args, inputs): if key not in __cache_transform_pipeline_by_task: ensure_device(task) convert_dtype(task) - pipe = transformers.pipeline(**task) - if pipe.tokenizer is None: + model_name = task.get("model", None) + if model_name and model_name.endswith("ggml"): + pipe = ctransformers.AutoModelForCausalLM.from_pretrained(model_name) + else: + pipe = transformers.pipeline(**task) + + if hasattr(pipe, "tokenizer") and pipe.tokenizer is None: pipe.tokenizer = AutoTokenizer.from_pretrained(pipe.model.name_or_path) __cache_transform_pipeline_by_task[key] = pipe pipe = __cache_transform_pipeline_by_task[key] - if pipe.task == "question-answering": - inputs = [orjson.loads(input) for input in inputs] - - convert_eos_token(pipe.tokenizer, args) - - results = pipe(inputs, **args) - + if type(pipe) is ctransformers.llm.LLM: + results = pipe(inputs[0]) + else: + if pipe.task == "question-answering": + inputs = [orjson.loads(input) for input in inputs] + convert_eos_token(pipe.tokenizer, args) + results = pipe(inputs, **args) return orjson.dumps(results, default=orjson_default).decode() @@ -465,7 +471,10 @@ def tune(task, hyperparams, path, x_train, x_test, y_train, y_test): elif task == "text-generation": max_length = hyperparams.pop("max_length", None) tokenizer.pad_token = tokenizer.eos_token - model = AutoModelForCausalLM.from_pretrained(model_name) + if model_name.endswith("ggml"): + model = ctransformers.AutoModelForCausalLM.from_pretrained(model_name) + else: + model = AutoModelForCausalLM.from_pretrained(model_name) model.resize_token_embeddings(len(tokenizer)) train = tokenize_text_generation(tokenizer, max_length, y_train) test = tokenize_text_generation(tokenizer, max_length, y_test) From 9a99bcac2e88f0ade339b351bbb512a2e124298d Mon Sep 17 00:00:00 2001 From: Montana Low Date: Fri, 16 Jun 2023 01:32:12 -0400 Subject: [PATCH 2/4] support args for ctransformers --- pgml-extension/src/bindings/transformers.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py index e3f03ae0c..0071c475f 100644 --- a/pgml-extension/src/bindings/transformers.py +++ b/pgml-extension/src/bindings/transformers.py @@ -92,19 +92,19 @@ def transform(task, args, inputs): ensure_device(task) convert_dtype(task) model_name = task.get("model", None) - if model_name and model_name.endswith("ggml"): + if model_name and model_name.endswith("-ggml"): pipe = ctransformers.AutoModelForCausalLM.from_pretrained(model_name) else: pipe = transformers.pipeline(**task) - - if hasattr(pipe, "tokenizer") and pipe.tokenizer is None: - pipe.tokenizer = AutoTokenizer.from_pretrained(pipe.model.name_or_path) + if pipe.tokenizer is None: + pipe.tokenizer = AutoTokenizer.from_pretrained(pipe.model.name_or_path) __cache_transform_pipeline_by_task[key] = pipe pipe = __cache_transform_pipeline_by_task[key] if type(pipe) is ctransformers.llm.LLM: - results = pipe(inputs[0]) + # ctransformers don't support batch inference + results = pipe(inputs[0], **args) else: if pipe.task == "question-answering": inputs = [orjson.loads(input) for input in inputs] @@ -471,10 +471,7 @@ def tune(task, hyperparams, path, x_train, x_test, y_train, y_test): elif task == "text-generation": max_length = hyperparams.pop("max_length", None) tokenizer.pad_token = tokenizer.eos_token - if model_name.endswith("ggml"): - model = ctransformers.AutoModelForCausalLM.from_pretrained(model_name) - else: - model = AutoModelForCausalLM.from_pretrained(model_name) + model = AutoModelForCausalLM.from_pretrained(model_name) model.resize_token_embeddings(len(tokenizer)) train = tokenize_text_generation(tokenizer, max_length, y_train) test = tokenize_text_generation(tokenizer, max_length, y_test) From 2d6455e71527f3f5f5fb3a6999a892e64813ecfe Mon Sep 17 00:00:00 2001 From: Montana Low Date: Fri, 16 Jun 2023 01:42:38 -0400 Subject: [PATCH 3/4] wrap output for consistency with non ggml --- pgml-extension/src/bindings/transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py index 0071c475f..9208b775e 100644 --- a/pgml-extension/src/bindings/transformers.py +++ b/pgml-extension/src/bindings/transformers.py @@ -104,7 +104,7 @@ def transform(task, args, inputs): if type(pipe) is ctransformers.llm.LLM: # ctransformers don't support batch inference - results = pipe(inputs[0], **args) + results = [pipe(inputs[0], **args)] else: if pipe.task == "question-answering": inputs = [orjson.loads(input) for input in inputs] From 16b2945afccb1bb52c838d65c5fc86829502931e Mon Sep 17 00:00:00 2001 From: Montana Low Date: Sat, 17 Jun 2023 11:39:52 -0700 Subject: [PATCH 4/4] add gptq support --- pgml-extension/requirements.txt | 1 + pgml-extension/src/bindings/transformers.py | 63 +++++++++++++++++---- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/pgml-extension/requirements.txt b/pgml-extension/requirements.txt index 6ec217773..4ade8bf6a 100644 --- a/pgml-extension/requirements.txt +++ b/pgml-extension/requirements.txt @@ -1,4 +1,5 @@ accelerate==0.19.0 +auto-gptq==0.2.2 ctransformers==0.2.8 datasets==2.12.0 deepspeed==0.9.2 diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py index 9208b775e..b7d0af968 100644 --- a/pgml-extension/src/bindings/transformers.py +++ b/pgml-extension/src/bindings/transformers.py @@ -3,7 +3,6 @@ import shutil import time -import ctransformers import datasets from InstructorEmbedding import INSTRUCTOR import numpy @@ -82,6 +81,47 @@ def ensure_device(kwargs): else: kwargs["device"] = "cpu" + +class GPTQPipeline(object): + def __init__(self, model_name, **task): + import auto_gptq + from huggingface_hub import snapshot_download + model_path = snapshot_download(model_name) + + self.model = auto_gptq.AutoGPTQForCausalLM.from_quantized(model_path, **task) + if "use_fast_tokenizer" in task: + self.tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=task.pop("use_fast_tokenizer")) + else: + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.task = "text-generation" + + def __call__(self, inputs, **kwargs): + outputs = [] + for input in inputs: + tokens = self.tokenizer(input, return_tensors="pt").to(self.model.device).input_ids + token_ids = self.model.generate(input_ids=tokens, **kwargs)[0] + outputs.append(self.tokenizer.decode(token_ids)) + return outputs + + +class GGMLPipeline(object): + def __init__(self, model_name, **task): + import ctransformers + + task.pop("model") + task.pop("task") + task.pop("device") + self.model = ctransformers.AutoModelForCausalLM.from_pretrained(model_name, **task) + self.tokenizer = None + self.task = "text-generation" + + def __call__(self, inputs, **kwargs): + outputs = [] + for input in inputs: + outputs.append(self.model(input, **kwargs)) + return outputs + + def transform(task, args, inputs): task = orjson.loads(task) args = orjson.loads(args) @@ -92,8 +132,11 @@ def transform(task, args, inputs): ensure_device(task) convert_dtype(task) model_name = task.get("model", None) - if model_name and model_name.endswith("-ggml"): - pipe = ctransformers.AutoModelForCausalLM.from_pretrained(model_name) + model_name = model_name.lower() if model_name else None + if model_name and "-ggml" in model_name: + pipe = GGMLPipeline(model_name, **task) + elif model_name and "-gptq" in model_name: + pipe = GPTQPipeline(model_name, **task) else: pipe = transformers.pipeline(**task) if pipe.tokenizer is None: @@ -102,15 +145,11 @@ def transform(task, args, inputs): pipe = __cache_transform_pipeline_by_task[key] - if type(pipe) is ctransformers.llm.LLM: - # ctransformers don't support batch inference - results = [pipe(inputs[0], **args)] - else: - if pipe.task == "question-answering": - inputs = [orjson.loads(input) for input in inputs] - convert_eos_token(pipe.tokenizer, args) - results = pipe(inputs, **args) - return orjson.dumps(results, default=orjson_default).decode() + if pipe.task == "question-answering": + inputs = [orjson.loads(input) for input in inputs] + convert_eos_token(pipe.tokenizer, args) + + return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode() def embed(transformer, inputs, kwargs): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy