Skip to content

dependencies for starcoder #648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions pgml-extension/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
accelerate==0.19.0
datasets==2.10.1
datasets==2.12.0
deepspeed==0.8.1
huggingface-hub==0.14.1
InstructorEmbedding
lightgbm
pandas==1.5.3
Expand All @@ -14,6 +15,6 @@ sentence-transformers==2.2.2
torch==1.13.1
torchaudio==0.13.1
torchvision==0.14.1
tqdm==4.64.1
transformers==4.28.1
tqdm==4.65.0
transformers==4.29.1
xgboost
52 changes: 42 additions & 10 deletions pgml-extension/src/bindings/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,44 @@
__cache_transform_pipeline_by_task = {}


DTYPE_MAP = {
"uint8": torch.uint8,
"int8": torch.int8,
"int16": torch.int16,
"int32": torch.int32,
"int64": torch.int64,
"bfloat16": torch.bfloat16,
"float16": torch.float16,
"float32": torch.float32,
"float64": torch.float64,
"complex64": torch.complex64,
"complex128": torch.complex128,
"bool": torch.bool,
}


def convert_dtype(kwargs):
if "torch_dtype" in kwargs:
kwargs["torch_dtype"] = DTYPE_MAP[kwargs["torch_dtype"]]


def convert_eos_token(tokenizer, args):
if "eos_token" in args:
args["eos_token_id"] = tokenizer.convert_tokens_to_ids(args.pop("eos_token"))
else:
args["eos_token_id"] = tokenizer.eos_token_id


def ensure_device(kwargs):
device = kwargs.get("device")
device_map = kwargs.get("device_map")
if device is None and device_map is None:
if torch.cuda.is_available():
kwargs["device"] = "cuda:" + str(os.getpid() % torch.cuda.device_count())
else:
kwargs["device"] = "cpu"


class NumpyJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.float32):
Expand All @@ -55,16 +93,19 @@ def transform(task, args, inputs):
args = json.loads(args)
inputs = json.loads(inputs)

key = ",".join([f"{key}:{val}" for (key, val) in sorted(task.items())])
ensure_device(task)
convert_dtype(task)

key = ",".join([f"{key}:{val}" for (key, val) in sorted(task.items())])
if key not in __cache_transform_pipeline_by_task:
__cache_transform_pipeline_by_task[key] = transformers.pipeline(**task)
pipe = __cache_transform_pipeline_by_task[key]

if pipe.task == "question-answering":
inputs = [json.loads(input) for input in inputs]

convert_eos_token(pipe.tokenizer, args)

return json.dumps(pipe(inputs, **args), cls=NumpyJSONEncoder)


Expand Down Expand Up @@ -540,12 +581,3 @@ def generate(model_id, data, config):
return all_preds


def ensure_device(kwargs):
device = kwargs.get("device")
device_map = kwargs.get("device_map")
if device is None and device_map is None:
if torch.cuda.is_available():
kwargs["device"] = "cuda:" + str(os.getpid() % torch.cuda.device_count())
else:
kwargs["device"] = "cpu"

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy