Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pgml-extension/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
accelerate==0.19.0
auto-gptq==0.2.2
ctransformers==0.2.8
datasets==2.12.0
deepspeed==0.9.2
huggingface-hub==0.14.1
Expand Down
59 changes: 52 additions & 7 deletions pgml-extension/src/bindings/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,47 @@ def ensure_device(kwargs):
else:
kwargs["device"] = "cpu"


class GPTQPipeline(object):
def __init__(self, model_name, **task):
import auto_gptq
from huggingface_hub import snapshot_download
model_path = snapshot_download(model_name)

self.model = auto_gptq.AutoGPTQForCausalLM.from_quantized(model_path, **task)
if "use_fast_tokenizer" in task:
self.tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=task.pop("use_fast_tokenizer"))
else:
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.task = "text-generation"

def __call__(self, inputs, **kwargs):
outputs = []
for input in inputs:
tokens = self.tokenizer(input, return_tensors="pt").to(self.model.device).input_ids
token_ids = self.model.generate(input_ids=tokens, **kwargs)[0]
outputs.append(self.tokenizer.decode(token_ids))
return outputs


class GGMLPipeline(object):
def __init__(self, model_name, **task):
import ctransformers

task.pop("model")
task.pop("task")
task.pop("device")
self.model = ctransformers.AutoModelForCausalLM.from_pretrained(model_name, **task)
self.tokenizer = None
self.task = "text-generation"

def __call__(self, inputs, **kwargs):
outputs = []
for input in inputs:
outputs.append(self.model(input, **kwargs))
return outputs


def transform(task, args, inputs):
task = orjson.loads(task)
args = orjson.loads(args)
Expand All @@ -90,21 +131,25 @@ def transform(task, args, inputs):
if key not in __cache_transform_pipeline_by_task:
ensure_device(task)
convert_dtype(task)
pipe = transformers.pipeline(**task)
if pipe.tokenizer is None:
pipe.tokenizer = AutoTokenizer.from_pretrained(pipe.model.name_or_path)
model_name = task.get("model", None)
model_name = model_name.lower() if model_name else None
if model_name and "-ggml" in model_name:
pipe = GGMLPipeline(model_name, **task)
elif model_name and "-gptq" in model_name:
pipe = GPTQPipeline(model_name, **task)
else:
pipe = transformers.pipeline(**task)
if pipe.tokenizer is None:
pipe.tokenizer = AutoTokenizer.from_pretrained(pipe.model.name_or_path)
__cache_transform_pipeline_by_task[key] = pipe

pipe = __cache_transform_pipeline_by_task[key]

if pipe.task == "question-answering":
inputs = [orjson.loads(input) for input in inputs]

convert_eos_token(pipe.tokenizer, args)

results = pipe(inputs, **args)

return orjson.dumps(results, default=orjson_default).decode()
return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode()


def embed(transformer, inputs, kwargs):
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy