diff --git a/examples/bench.py b/examples/bench.py index 2b968fa6..c05bd3c9 100644 --- a/examples/bench.py +++ b/examples/bench.py @@ -157,6 +157,11 @@ def get_args(): action="store_true", help="Run cambricon test", ) + parser.add_argument( + "--ali", + action="store_true", + help="Run alippu test", + ) parser.add_argument( "--model", type=str, @@ -351,6 +356,8 @@ def run( device_str = "cuda" elif args.cambricon: device_str = "mlu" + elif args.ali: + device_str = "cuda" else: print( "python examples/bench.py --nvidia --model=~/TinyLlama-1.1B-Chat-v1.0/ --batch-size=2 --tp=1 --input-len=50 --output-len=50" diff --git a/examples/jiuge.py b/examples/jiuge.py index 0ca5a418..ca6a8028 100644 --- a/examples/jiuge.py +++ b/examples/jiuge.py @@ -47,6 +47,11 @@ def get_args(): action="store_true", help="Run cambricon test", ) + parser.add_argument( + "--ali", + action="store_true", + help="Run alippu test", + ) parser.add_argument( "--model_path", type=str, @@ -252,9 +257,11 @@ def test( device_str = "cuda" elif args.cambricon: device_str = "mlu" + elif args.ali: + device_str = "cuda" else: print( - "Usage: python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar] --model_path=\n" + "Usage: python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar | --cambricon | --ali] --model_path=\n" "such as, python examples/jiuge.py --nvidia --model_path=~/TinyLlama-1.1B-Chat-v1.0" ) sys.exit(1) diff --git a/python/infinilm/server/inference_server.py b/python/infinilm/server/inference_server.py index 03849161..f1792531 100644 --- a/python/infinilm/server/inference_server.py +++ b/python/infinilm/server/inference_server.py @@ -414,6 +414,7 @@ def parse_args(): parser.add_argument("--moore", action="store_true", help="Use Moore device") parser.add_argument("--iluvatar", action="store_true", help="Use Iluvatar device") parser.add_argument("--cambricon", action="store_true", help="Use Cambricon device") + parser.add_argument("--ali", action="store_true", help="Use Ali PPU device") parser.add_argument( "--enable-graph", action="store_true", @@ -447,9 +448,11 @@ def main(): device = "cuda" elif args.cambricon: device = "mlu" + elif args.ali: + device = "cuda" else: print( - "Usage: python infinilm.server.inference_server [--cpu | --nvidia | --metax | --moore | --iluvatar | --cambricon] " + "Usage: python infinilm.server.inference_server [--cpu | --nvidia | --metax | --moore | --iluvatar | --cambricon | --ali] " "--model_path= --max_tokens=MAX_TOKENS --max_batch_size=MAX_BATCH_SIZE" "\n" "Example: python infinilm.server.inference_server --nvidia --model_path=/data/shared/models/9G7B_MHA/ " diff --git a/scripts/jiuge.py b/scripts/jiuge.py index e50ea327..35b2c8ca 100644 --- a/scripts/jiuge.py +++ b/scripts/jiuge.py @@ -860,9 +860,11 @@ def test(): device_type = DeviceType.DEVICE_TYPE_KUNLUN elif sys.argv[1] == "--hygon": device_type = DeviceType.DEVICE_TYPE_HYGON + elif sys.argv[1] == "--ali": + device_type = DeviceType.DEVICE_TYPE_ALI else: print( - "Usage: python jiuge.py [--cpu | --nvidia| --qy| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] [n_device] [--verbose]" + "Usage: python jiuge.py [--cpu | --nvidia| --qy| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon | --ali] [n_device] [--verbose]" ) sys.exit(1) diff --git a/scripts/libinfinicore_infer/base.py b/scripts/libinfinicore_infer/base.py index 3305cdba..2af0c238 100644 --- a/scripts/libinfinicore_infer/base.py +++ b/scripts/libinfinicore_infer/base.py @@ -37,6 +37,7 @@ class DeviceType(ctypes.c_int): DEVICE_TYPE_KUNLUN = 7 DEVICE_TYPE_HYGON = 8 DEVICE_TYPE_QY = 9 + DEVICE_TYPE_ALI = 10 class KVCacheCStruct(ctypes.Structure):