Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions fms_mo/dq.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,15 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
config_kwargs = {
"cache_dir": model_args.cache_dir,
"revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None,
"torchscript": True,
"token": True if model_args.use_auth_token else None,
"attn_implementation": attn_implementation,
}
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
tokenizer_kwargs = {
"cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None,
"token": True if model_args.use_auth_token else None,
}
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, **tokenizer_kwargs
Expand All @@ -121,7 +120,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
config=config,
cache_dir=model_args.cache_dir,
revision="main",
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
torch_dtype=torch_dtype,
device_map=model_args.device_map,
low_cpu_mem_usage=bool(model_args.device_map),
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dynamic = ["version"]
dependencies = [
"numpy>=1.26.4,<2.3.0",
"accelerate>=0.20.3,!=0.34,<1.11",
"transformers>=4.45,<4.58",
"transformers>4.45,<5.9",
"torch>=2.2.0,<2.11.0",
"tqdm>=4.66.2,<5.0",
"datasets>=3.0.0,<5.0",
Expand All @@ -36,6 +36,7 @@ dependencies = [
[project.optional-dependencies]
examples = ["ninja>=1.11.1.1,<2.0", "evaluate", "huggingface_hub"]
fp8 = ["llmcompressor", "torchao==0.11"] # FP8 matmul on CPU needs a fix before advancing torchao > 0.11
fp8-infer = ["torchao==0.11"]
gptq = ["Cython", "gptqmodel>=1.7.3"]
mx = ["microxcaling>=1.1"]
opt = ["fms-model-optimizer[fp8, gptq, mx]"]
Expand Down
12 changes: 9 additions & 3 deletions tests/build/test_launch_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# Third Party
import pytest
import torch
import transformers

# First Party
from build.accelerate_launch import main
Expand Down Expand Up @@ -241,16 +242,21 @@ def _validate_termination_files_when_quantization_succeeds(base_dir):
"""Check whether the termination log and .complete files exists"""
assert os.path.exists(os.path.join(base_dir, "/termination-log")) is False
assert os.path.exists(os.path.join(base_dir, ".complete")) is True
# assert os.path.exists(os.path.join(base_dir, training_logs_filename)) is True


def _validate_quantization_output(base_dir, quant_method):
"""Check whether the tokenizer and quantized model artifacts exists"""
# Check tokenizer files exist
assert os.path.exists(os.path.join(base_dir, "tokenizer.json")) is True
assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True

# special_tokens_map.json is optional in transformers 5.0+ for some tokenizers
transformers_version = tuple(
int(x) for x in transformers.__version__.split(".")[:2]
)
if transformers_version < (5, 0):
assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True

assert os.path.exists(os.path.join(base_dir, "tokenizer_config.json")) is True
# assert os.path.exists(os.path.join(base_dir, "tokenizer.model")) is True

# Check quantized model files exist
if quant_method == "gptq":
Expand Down
15 changes: 15 additions & 0 deletions tests/models/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import pytest
import torch
import torch.nn.functional as F
import transformers

# Local
# fms_mo imports
Expand Down Expand Up @@ -1302,6 +1303,12 @@ def model_bert():
Returns:
transformers.models.bert.modeling_bert.BertModel: BERT model
"""
# torchscript parameter removed in transformers 5.0
transformers_version = tuple(
int(x) for x in transformers.__version__.split(".")[:2]
)
if transformers_version >= (5, 0):
return BertModel.from_pretrained("google-bert/bert-base-uncased")
return BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)


Expand All @@ -1313,6 +1320,14 @@ def model_bert_eager():
Returns:
transformers.models.bert.modeling_bert.BertModel: BERT model
"""
# torchscript parameter removed in transformers 5.0
transformers_version = tuple(
int(x) for x in transformers.__version__.split(".")[:2]
)
if transformers_version >= (5, 0):
return BertModel.from_pretrained(
"google-bert/bert-base-uncased", attn_implementation="eager"
)
return BertModel.from_pretrained(
"google-bert/bert-base-uncased", torchscript=True, attn_implementation="eager"
)
Expand Down
8 changes: 8 additions & 0 deletions tests/models/test_qmodelprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ def test_vit_dynamo(
qmodule_error(model_vit, 2, 36)


@pytest.mark.skipif(
not available_packages["torchvision"],
reason="Requires torchvision",
)
def test_resnet18(
model_resnet18,
batch_resnet18,
Expand All @@ -290,6 +294,10 @@ def test_resnet18(
qmodule_error(model_resnet18, 4, 17)


@pytest.mark.skipif(
not available_packages["torchvision"],
reason="Requires torchvision",
)
def test_vit_base(
model_vit_base,
batch_vit_base,
Expand Down
Loading