欢迎光临散文网 会员登陆 & 注册

【官方教程】ChatGLM2-6B 部署与微调

2023-07-21 09:48 作者:菁英小学生  | 我要投稿

Microsoft Windows [版本 10.0.19045.3086]

(c) Microsoft Corporation。保留所有权利。


C:\Users\Administrator>conda env list

# conda environments:

#

base           D:\Develop\anaconda3

             H:\OpenAI\ChatGLM2-6B\ENV



C:\Users\Administrator>cd /d H:\OpenAI\ChatGLM2-6B


H:\OpenAI\ChatGLM2-6B>conda activate H:\OpenAI\ChatGLM2-6B\ENV


(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

You are using a model of type chatglm to instantiate a model of type . This is not supported for all configurations of models and can yield errors.

Loading checkpoint shards: 100%|█████████████████████████████████████████████| 7/7 [37:52<00:00, 324.60s/it]

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮

│ H:\OpenAI\ChatGLM2-6B\cli_demo.py:8 in <module>                         │

│                                                 │

│  5 import readline                                       │

│  6                                               │

│  7 tokenizer = AutoTokenizer.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True)   │

│ ❱ 8 model = AutoModel.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True).quantize(  │

│  9 model = model.eval()                                    │

│  10                                               │

│  11 os_name = platform.system()                                 │

│                                                 │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\modeling_chat │

│ glm.py:1109 in quantize                                     │

│                                                 │

│  1106 │  │                                           │

│  1107 │  │  self.config.quantization_bit = bits                        │

│  1108 │  │                                           │

│ ❱ 1109 │  │  self.transformer.encoder = quantize(self.transformer.encoder, bits, empty_init=e │

│  1110 │  │  │  │  │  │  │  │  │  │  │  **kwargs)                   │

│  1111 │  │  return self                                    │

│  1112                                              │

│                                                 │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:155 in quantize                                        │

│                                                 │

│  152 def quantize(model, weight_bit_width, empty_init=False, device=None):           │

│  153 │  """Replace fp16 linear with quantized linear"""                    │

│  154 │  for layer in model.layers:                               │

│ ❱ 155 │  │  layer.self_attention.query_key_value = QuantizedLinear(              │

│  156 │  │  │  weight_bit_width=weight_bit_width,                       │

│  157 │  │  │  weight=layer.self_attention.query_key_value.weight.to(torch.cuda.current_dev  │

│  158 │  │  │  bias=layer.self_attention.query_key_value.bias,                │

│                                                 │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:139 in __init__                                        │

│                                                 │

│  136 │  │  │  self.weight_scale = weight.abs().max(dim=-1).values / ((2 ** (weight_bit_wid  │

│  137 │  │  │  self.weight = torch.round(weight / self.weight_scale[:, None]).to(torch.int8  │

│  138 │  │  │  if weight_bit_width == 4:                           │

│ ❱ 139 │  │  │  │  self.weight = compress_int4_weight(self.weight)              │

│  140 │  │                                           │

│  141 │  │  self.weight = Parameter(self.weight.to(device), requires_grad=False)        │

│  142 │  │  self.weight_scale = Parameter(self.weight_scale.to(device), requires_grad=False)  │

│                                                 │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:78 in compress_int4_weight                                  │

│                                                 │

│  75 │  │  gridDim = (n, 1, 1)                                │

│  76 │  │  blockDim = (min(round_up(m, 32), 1024), 1, 1)                   │

│  77 │  │                                           │

│ ❱ 78 │  │  kernels.int4WeightCompression(                           │

│  79 │  │  │  gridDim,                                    │

│  80 │  │  │  blockDim,                                   │

│  81 │  │  │  0,                                       │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:48 in __call__      │

│                                                 │

│  45 │  │  │  sharedMemBytes : int, stream : cudart.cudaStream_t, params : List[Any] ) ->   │

│  46 │  │  assert len(gridDim) == 3                              │

│  47 │  │  assert len(blockDim) == 3                              │

│ ❱ 48 │  │  func = self._prepare_func()                             │

│  49 │  │                                            │

│  50 │  │  cuda.cuLaunchKernel(func,                              │

│  51 │  │  │  gridDim[0], gridDim[1], gridDim[2],                       │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:40 in _prepare_func   │

│                                                 │

│  37 │  │  cudart.cudaSetDevice(curr_device)  # ensure cudart context             │

│  38 │  │  if curr_device not in self._funcs:                         │

│  39 │  │  │  self._funcs[curr_device] = cuda.cuModuleGetFunction(              │

│ ❱ 40 │  │  │  │  self._module.get_module(), self._func_name                 │

│  41 │  │  │  )                                        │

│  42 │  │  return self._funcs[curr_device]                           │

│  43                                               │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:24 in get_module     │

│                                                 │

│  21 │  │  curr_device = cudart.cudaGetDevice()                        │

│  22 │  │  if curr_device not in self._module:                         │

│  23 │  │  │  Device(curr_device).use()  # force initialize context             │

│ ❱ 24 │  │  │  self._module[curr_device] = cuda.cuModuleLoadData(self._code)          │

│  25 │  │  return self._module[curr_device]                          │

│  26                                               │

│  27                                               │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\base.py:94 in wrapper      │

│                                                 │

│  91 │  │  │  def decorator(f):                                │

│  92 │  │  │  │  @wraps(f)                                  │

│  93 │  │  │  │  def wrapper(*args, **kwargs):                        │

│ ❱ 94 │  │  │  │  │  return f(*args, **kwargs)                        │

│  95 │  │  │  │  return wrapper                               │

│  96 │  │  │  return decorator                                │

│  97                                               │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:233 in cuModuleLoadData │

│                                                 │

│  230 @cuda.bind("cuModuleLoadData", [ctypes.POINTER(CUmodule), ctypes.c_void_p], CUresult)   │

│  231 def cuModuleLoadData(data : bytes) -> CUmodule:                      │

│  232 │  module = CUmodule()                                  │

│ ❱ 233 │  checkCUStatus(cuda.cuModuleLoadData(ctypes.byref(module), data))            │

│  234 │  return module                                     │

│  235                                              │

│  236 @cuda.bind("cuModuleUnload", [CUmodule], CUresult)                     │

│                                                 │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:216 in checkCUStatus   │

│                                                 │

│  213                                              │

│  214 def checkCUStatus(error : int) -> None:                          │

│  215 │  if error != CUDA_SUCCESS:                               │

│ ❱ 216 │  │  raise RuntimeError("CUDA Error: %s" % cuGetErrorString(error))           │

│  217                                              │

│  218 @cuda.bind("cuDriverGetVersion", [ctypes.POINTER(ctypes.c_int)], CUresult)         │

│  219 def cuDriverGetVersion() -> int:                              │

╰──────────────────────────────────────────────────────────────────────────────────────────────────╯

RuntimeError: CUDA Error: no kernel image is available for execution on the device


(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

Traceback (most recent call last):

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict

  resolved_config_file = cached_file(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file

  resolved_file = hf_hub_download(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn

  validate_repo_id(arg_value)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id

  raise HFValidationError(

huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

 File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>

  model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained

  config, kwargs = AutoConfig.from_pretrained(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained

  config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict

  config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict

  raise EnvironmentError(

OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file


(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

Traceback (most recent call last):

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict

  resolved_config_file = cached_file(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file

  resolved_file = hf_hub_download(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn

  validate_repo_id(arg_value)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id

  raise HFValidationError(

huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

 File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>

  model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained

  config, kwargs = AutoConfig.from_pretrained(

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained

  config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict

  config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

 File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict

  raise EnvironmentError(

OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file


(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>

(H:\OpenAI\ChatGLM2-6B\ENV) C:\Users\Administrator>python

Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32

Type "help", "copyright", "credits" or "license" for more information.

>>> import torch

>>> x=torch.rand(4,6)

>>> print(x)

tensor([[0.4784, 0.6858, 0.0851, 0.2220, 0.0233, 0.4879],

    [0.2230, 0.8423, 0.2485, 0.6101, 0.2816, 0.4068],

    [0.5379, 0.6412, 0.2049, 0.8542, 0.3221, 0.2888],

    [0.4783, 0.4648, 0.6325, 0.8418, 0.9524, 0.1783]])

>>> torch.cuda.is_available()

True

>>> torch.cuda.current_device()

0

>>> torch.cuda.device_count()

1

>>> torch.__version__

'2.0.1'

>>> torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))

True

>>> a=torch.Tensor([1,2])

>>> a=a.cuda()

>>> a

tensor([1., 2.], device='cuda:0')

>>>



【官方教程】ChatGLM2-6B 部署与微调的评论 (共 条)

分享到微博请遵守国家法律