diff --git a/README.md b/README.md index 7924ff7..b6bec25 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,13 @@

- 🤗 Bamba on Hugging Face  | Bamba Blog  + 🤗 Bamba on Hugging Face  | Bamba Blog  - + Bamba-9B is a decoder-only language model based on the [Mamba-2](https://github.com/state-spaces/mamba) architecture and is designed to handle a wide range of text generation tasks. It is trained from scratch using a two-stage training approach. In the first stage, the model is trained on 2 trillion tokens from the Dolma v1.7 dataset. In the second stage, it undergoes additional training on 200 billion tokens, leveraging a carefully curated blend of high-quality data to further refine its performance and enhance output quality. @@ -44,14 +46,17 @@ pip install git+https://github.com/huggingface/transformers.git | Bamba | 9B (9.78B) | 32 | 4096 | 32 | Yes | 8 | 4096 | False | ### Checkpoints -You can find links to our model checkpoints here: [Bamba Models](https://huggingface.co/collections/ibm-fms/bamba-674f1388b9bbc98b413c7bab) +You can find links to our model checkpoints here: [Bamba Models](https://huggingface.co/collections/ibm-ai-platform +/bamba-674f1388b9bbc98b413c7bab) ## Inference You can use the following command to perform text generation using one of our checkpoints provided above: ```python -python text_generation.py --model_path ibm-fms/Bamba-9B --tokenizer_path ibm-fms/Bamba-9B --prompt "The largest living mammal on Earth is " --max_new_tokens 128 +python text_generation.py --model_path ibm-ai-platform +/Bamba-9B --tokenizer_path ibm-ai-platform +/Bamba-9B --prompt "The largest living mammal on Earth is " --max_new_tokens 128 ``` ## Training @@ -247,7 +252,8 @@ make -j ### Conversion to GGUF -You can use a pre-converted GGUF file from Huggingface (e.g. [bamba-9b.gguf](https://huggingface.co/ibm-fms/Bamba-9B/blob/main/bamba-9b.gguf)). If one doesn't exist, you can use the [convert_hf_to_gguf.py](https://github.com/gabe-l-hart/llama.cpp/blob/BambaArchitecture/convert_hf_to_gguf.py) script from Gabe's fork to perform the conversion manually. +You can use a pre-converted GGUF file from Huggingface (e.g. [bamba-9b.gguf](https://huggingface.co/ibm-ai-platform +/Bamba-9B/blob/main/bamba-9b.gguf)). If one doesn't exist, you can use the [convert_hf_to_gguf.py](https://github.com/gabe-l-hart/llama.cpp/blob/BambaArchitecture/convert_hf_to_gguf.py) script from Gabe's fork to perform the conversion manually. ```sh # Install the python dependencies diff --git a/blog/bamba.md b/blog/bamba.md index a52fe46..6bc03d8 100644 --- a/blog/bamba.md +++ b/blog/bamba.md @@ -10,7 +10,8 @@ We introduce **Bamba-9B**, an inference-efficient Hybrid Mamba2 model trained by ## Artifacts 📦 -1. [Hugging Face Bamba collection](https://huggingface.co/collections/ibm-fms/bamba-674f1388b9bbc98b413c7bab) +1. [Hugging Face Bamba collection](https://huggingface.co/collections/ibm-ai-platform +ibm-ai-platform/bamba-674f1388b9bbc98b413c7bab) 2. [GitHub repo with inference, training, and tuning scripts](https://github.com/foundation-model-stack/bamba) 3. [Data loader](https://github.com/foundation-model-stack/fms-fsdp/blob/main/fms_fsdp/utils/dataset_utils.py) 4. [Quantization](https://github.com/foundation-model-stack/fms-model-optimizer) @@ -32,8 +33,10 @@ To use Bamba with transformers, you can use the familiar `AutoModel` classes and ```python from transformers import AutoModelForCausalLM, AutoTokenizer -model = AutoModelForCausalLM.from_pretrained("ibm-fms/Bamba-9B") -tokenizer = AutoTokenizer.from_pretrained("ibm-fms/Bamba-9B") +model = AutoModelForCausalLM.from_pretrained("ibm-ai-platform +ibm-ai-platform/Bamba-9B") +tokenizer = AutoTokenizer.from_pretrained("ibm-ai-platform +ibm-ai-platform/Bamba-9B") message = ["Mamba is a snake with following properties "] inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False) @@ -67,7 +70,8 @@ We compare Bamba-9B with SoTA transformer models of similar size ([Meta Llama 3. | Model | Average | MMLU | ARC-C | GSM8K | Hellaswag | OpenbookQA | Piqa | TruthfulQA | Winogrande | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | -| [Bamba 9B](https://huggingface.co/ibm-fms/Bamba-9B) | 62.31 | 60.77 | 63.23 | 36.77 | 81.8 | 47.6 | 82.26 | 49.21 | 76.87 | +| [Bamba 9B](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B) | 62.31 | 60.77 | 63.23 | 36.77 | 81.8 | 47.6 | 82.26 | 49.21 | 76.87 | | [Meta Llama 3.1 8B](https://huggingface.co/meta-llama/Llama-3.1-8B) | 63.51 | 66.26 | 57.85 | 49.96 | 81.98 | 46.8 | 82.54 | 45.16 | 77.51 | | [Olmo2 7B](https://huggingface.co/allenai/OLMo-2-1124-7B) | 66.17 | 63.96 | 64.51 | 68.01 | 81.93 | **49.2** | 81.39 | 43.32 | 77.03 | | [IBM Granite v3 8B](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | 67.47 | 65.45 | 63.74 | 62.55 | **83.29** | 47.6 | **83.41** | 52.89 | **80.82** | @@ -79,7 +83,8 @@ We compare Bamba-9B with SoTA transformer models of similar size ([Meta Llama 3. | Model | Average | MMLU-PRO | BBH | GPQA | IFEval | MATH Lvl 5 | MuSR | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | -| [Bamba 9B](https://huggingface.co/ibm-fms/Bamba-9B) | 10.91 | 17.53 | 17.4 | 4.14 | 15.16 | 1.66 | 9.59 | +| [Bamba 9B](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B) | 10.91 | 17.53 | 17.4 | 4.14 | 15.16 | 1.66 | 9.59 | | [Meta Llama 3.1 8B](https://huggingface.co/meta-llama/Llama-3.1-8B) | 14.27 | 25.46 | 25.16 | 8.61 | 12.55 | 5.14 | 8.72 | | [Olmo2 7B](https://huggingface.co/allenai/OLMo-2-1124-7B) | 13.36 | 22.79 | 21.69 | 4.92 | 16.35 | 4.38 | 10.02 | | [IBM Granite v3 8B](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | 21.14 | 25.83 | 28.02 | 9.06 | **44.79** | 9.82 | 9.32 | @@ -93,7 +98,8 @@ Safety benchmarks are crucial for ensuring AI models generate content that is et | Model | PopQA | Toxigen | BBQ | Crow-SPairs* | | :---- | :---- | :---- | :---- | :---- | -| [Bamba 9B](https://huggingface.co/ibm-fms/Bamba-9B) | 20.5 | 57.4 | 44.2 | 70.8 | +| [Bamba 9B](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B) | 20.5 | 57.4 | 44.2 | 70.8 | | [Meta Llama 3.1 8B](https://huggingface.co/meta-llama/Llama-3.1-8B) | **28.77** | 67.02 | 59.97 | 70.84 | | [IBM Granite v3 8B](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | 27.5 | **79.9** | **82.1** | 75 | | [Olmo2 7B](https://huggingface.co/allenai/OLMo-2-1124-7B) | 25.7 | 63.1 | 58.4 | 72 | @@ -111,9 +117,11 @@ We pick a few prominent models: [Olmo 7B](https://huggingface.co/allenai/OLMo-7B | Model | Average | MMLU | ARC-C | GSM8K | Hellaswag | OpenbookQA | Piqa | TruthfulQA | Winogrande | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | -| [Bamba 9B (2.2T)](https://huggingface.co/ibm-fms/Bamba-9B) | **62.31** | **60.77** | **63.23** | **36.77** | 81.8 | **47.6** | 82.26 | **49.21** | 76.87 | +| [Bamba 9B (2.2T)](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B) | **62.31** | **60.77** | **63.23** | **36.77** | 81.8 | **47.6** | 82.26 | **49.21** | 76.87 | | [Olmo1.5 7B (2T)](https://huggingface.co/allenai/OLMo-7B-0424-hf) | 55.8 | 53.38 | 50.51 | 27.67 | 79.13 | 45.2 | 81.56 | 35.92 | 73.09 | -| [Bamba 9B (2T)](https://huggingface.co/ibm-fms/Bamba-9B-2T) | 59.11 | 59.05 | 57.25 | 24.03 | **83.66** | 47.6 | **83.62** | 38.26 | **79.4** | +| [Bamba 9B (2T)](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B-2T) | 59.11 | 59.05 | 57.25 | 24.03 | **83.66** | 47.6 | **83.62** | 38.26 | **79.4** | | [Meta Llama2 7B (2T)](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 53.78 | 46.64 | 52.65 | 13.57 | 78.95 | 45.2 | 80.03 | 38.96 | 74.27 | | [IBM Granite 7B (2T)](https://huggingface.co/ibm-granite/granite-7b-base) | 52.07 | 49.02 | 49.91 | 10.84 | 77.0 | 40.8 | 80.14 | 38.7 | 70.17 | @@ -132,7 +140,8 @@ Falcon Mamba is a pure Mamba model, Zamba has shared attention layer for every 6 | Model | Average | MMLU | ARC-C | GSM8K | Hellaswag | OpenbookQA | Piqa | TruthfulQA | Winogrande | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | -| [Bamba 9B](https://huggingface.co/ibm-fms/Bamba-9B) | 62.31 | 60.77 | 63.23 | 36.77 | 81.8 | 47.6 | 82.26 | 49.21 | 76.87 | +| [Bamba 9B](https://huggingface.co/ibm-ai-platform +ibm-ai-platform/Bamba-9B) | 62.31 | 60.77 | 63.23 | 36.77 | 81.8 | 47.6 | 82.26 | 49.21 | 76.87 | | NVIDIA Mamba2 Hybrid 8B\* | 58.78 | 53.6 | 47.7 | 77.69 | \-- | 42.8 | 79.65 | 38.72 | 71.27 | | [Zamba 7B](https://huggingface.co/Zyphra/Zamba-7B-v1) | 64.36 | 57.85 | 55.38 | 61.33 | 82.27 | 46.8 | **82.21** | 49.69 | 79.32 | | [Falcon Mamba 7B](https://huggingface.co/tiiuae/falcon-mamba-7b) | **65.31** | **63.19** | **63.4** | **52.08** | 80.82 | **47.8** | **83.62** | **53.46** | **78.14** | diff --git a/blog/bamba31T.md b/blog/bamba31T.md index c6a97d5..529f9fa 100644 --- a/blog/bamba31T.md +++ b/blog/bamba31T.md @@ -3,7 +3,7 @@ During Christmas of 2024, IBM, Princeton, CMU, and UIUC [released](https://huggingface.co/blog/bamba), Bamba v1, a performant Mamba2 based pretrained model with full data lineage trained to 2T tokens. Since then, we have been busy cooking an update with new datasets. Today, we are excited to release Bamba v2, trained for an additional 1T tokens that significantly improves on Bamba v1. The L1 and L2 leaderboard scores outperform Llama 3.1 8B, which was trained with nearly 5x the amount of data. All of this with the inference speedup that we get from Mamba2 based architecture, which with the latest vLLM is 2-2.5x faster than similar sized transformer models. ## Artifacts 📦 -1. [Hugging Face Bamba collection](https://huggingface.co/collections/ibm-fms/bamba-674f1388b9bbc98b413c7bab) +1. [Hugging Face Bamba collection](https://huggingface.co/collections/ibm-ai-platform/bamba-674f1388b9bbc98b413c7bab) 2. [GitHub repo with inference, training, and tuning scripts](https://github.com/foundation-model-stack/bamba) 3. [vLLM RFC](https://github.com/vllm-project/vllm/issues/17140) diff --git a/evaluation/README.md b/evaluation/README.md index b1e039e..293439d 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -90,8 +90,8 @@ In case you just want to run the benchmark as is and you do not want to use a mo harness_path="path/to/lm-evaluation-harness" python_path="python" lm_eval_script="${harness_path}/lm_eval" -pretrained_model="ibm-fms/Bamba-9B" -output_base_path="evaluation_results/debug/ibm-fms_Bamba-9B" +pretrained_model="ibm-ai-platform/Bamba-9B" +output_base_path="evaluation_results/debug/ibm-ai-platform_Bamba-9B" batch_size=4 # Function to run lm_eval with common arguments diff --git a/evaluation/aggregation.py b/evaluation/aggregation.py index a53f2ed..6811729 100644 --- a/evaluation/aggregation.py +++ b/evaluation/aggregation.py @@ -102,7 +102,7 @@ def get_results_df(res_dir_paths, results_from_papers_path=None): res_df["score"] = res_df["score"].round(2) res_df["model"] = res_df["model"].apply( - lambda x: x.replace("/dccstor/fme/users/yotam/models/", "ibm-fms/") + lambda x: x.replace("/dccstor/fme/users/yotam/models/", "ibm-ai-platform/") ) # df_pivot_score.to_csv("output/combined_results.csv", index=False) diff --git a/evaluation/assets/eval_metadata.csv b/evaluation/assets/eval_metadata.csv index 57998ea..be93d4e 100644 --- a/evaluation/assets/eval_metadata.csv +++ b/evaluation/assets/eval_metadata.csv @@ -5,38 +5,38 @@ allenai/OLMo-2-1124-7B,7,PT,non_bamba allenai/OLMo-7B-0424-hf,7,PT,non_bamba allenai/OLMo-7B-hf,7,PT,non_bamba google/gemma-2-9b,9,PT,non_bamba -ibm-fms/Bamba-9.8b-1.8T-hf,9,PT,bamba -ibm-fms/Bamba-9.8b-2.2T-hf,9,PT,bamba -ibm-fms/Bamba-9.8b-2T-hf,9,PT,bamba -ibm-fms/Bamba-9B-1.8T-fp8,9,PT,bamba -ibm-fms/Bamba-9B-2.65T,9,PT,bamba -ibm-fms/Bamba-9B-2T-fp8,9,PT,bamba -ibm-fms/Bamba-9B-fp8,9,PT,bamba -ibm-fms/Bamba-9b-2.1T-hf,9,PT,bamba -ibm-fms/Bamba-9b-2.3T-hf,9,PT,bamba -ibm-fms/Bamba-9b-2.5T-hf,9,PT,bamba -ibm-fms/Bamba-9b-2.6T-hf,9,PT,bamba -ibm-fms/Bamba-9b-2.8T-hf,9,PT,bamba -ibm-fms/Bamba_annealed_models/Bamba-9b-2.1T-finemath-hf,9,PT,bamba -ibm-fms/Bamba_annealed_models/Bamba-9b-Olmo-constant-2.5T-hf,9,PT,bamba -ibm-fms/Bamba_annealed_models/Bamba-9b-Olmo-cosine-2.5T-hf,9,PT,bamba -ibm-fms/Bamba_annealed_models/Bamba-9b-Olmo-cosine-4e5-2.5T-hf,9,PT,bamba -ibm-fms/agentinstruct_lr1e_5-hf,9,SFT,bamba -ibm-fms/agentinstruct_lr1e_6-hf,9,SFT,bamba -ibm-fms/anteater_lr1e_5-hf,9,SFT,bamba -ibm-fms/anteater_lr1e_6-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_256-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_32-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_64-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.06-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1_gbs_16-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1_gbs_32-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0_gbs_128_base-hf,9,SFT,bamba -ibm-fms/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0_gbs_16-hf,9,SFT,bamba -ibm-fms/lchu/70b_hsdp_768/hf/step-225000,70,PT,non_bamba -ibm-fms/tuluv3_lr1e_5-hf,9,SFT,bamba -ibm-fms/tuluv3_lr1e_6-hf,9,SFT,bamba +ibm-ai-platform/Bamba-9.8b-1.8T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9.8b-2.2T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9.8b-2T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9B-1.8T-fp8,9,PT,bamba +ibm-ai-platform/Bamba-9B-2.65T,9,PT,bamba +ibm-ai-platform/Bamba-9B-2T-fp8,9,PT,bamba +ibm-ai-platform/Bamba-9B-fp8,9,PT,bamba +ibm-ai-platform/Bamba-9b-2.1T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9b-2.3T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9b-2.5T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9b-2.6T-hf,9,PT,bamba +ibm-ai-platform/Bamba-9b-2.8T-hf,9,PT,bamba +ibm-ai-platform/Bamba_annealed_models/Bamba-9b-2.1T-finemath-hf,9,PT,bamba +ibm-ai-platform/Bamba_annealed_models/Bamba-9b-Olmo-constant-2.5T-hf,9,PT,bamba +ibm-ai-platform/Bamba_annealed_models/Bamba-9b-Olmo-cosine-2.5T-hf,9,PT,bamba +ibm-ai-platform/Bamba_annealed_models/Bamba-9b-Olmo-cosine-4e5-2.5T-hf,9,PT,bamba +ibm-ai-platform/agentinstruct_lr1e_5-hf,9,SFT,bamba +ibm-ai-platform/agentinstruct_lr1e_6-hf,9,SFT,bamba +ibm-ai-platform/anteater_lr1e_5-hf,9,SFT,bamba +ibm-ai-platform/anteater_lr1e_6-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_256-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_32-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_gbs_64-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.06-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1_gbs_16-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0.1_gbs_32-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0_gbs_128_base-hf,9,SFT,bamba +ibm-ai-platform/instruct_models/tuluv3/2.3T_base/lr1e_6_wd_0_gbs_16-hf,9,SFT,bamba +ibm-ai-platform/lchu/70b_hsdp_768/hf/step-225000,70,PT,non_bamba +ibm-ai-platform/tuluv3_lr1e_5-hf,9,SFT,bamba +ibm-ai-platform/tuluv3_lr1e_6-hf,9,SFT,bamba ibm-granite/granite-3.0-8b-base,8,PT,non_bamba ibm-granite/granite-7b-base,7,PT,non_bamba meta-llama/Llama-2-7b-hf,7,PT,non_bamba diff --git a/evaluation/scripts/example_run_lmeval.sh b/evaluation/scripts/example_run_lmeval.sh index 17b4069..6f09c5d 100644 --- a/evaluation/scripts/example_run_lmeval.sh +++ b/evaluation/scripts/example_run_lmeval.sh @@ -4,8 +4,8 @@ harness_path="path/to/lm-evaluation-harness" python_path="python" lm_eval_script="${harness_path}/lm_eval" -pretrained_model="ibm-fms/Bamba-9B" -output_base_path="evaluation_results/debug/ibm-fms_Bamba-9B" +pretrained_model="ibm-ai-platform/Bamba-9B" +output_base_path="evaluation_results/debug/ibm-ai-platform_Bamba-9B" batch_size=4 # Function to run lm_eval with common arguments diff --git a/evaluation/serve_results.py b/evaluation/serve_results.py index b4b8618..5f4e620 100644 --- a/evaluation/serve_results.py +++ b/evaluation/serve_results.py @@ -83,7 +83,7 @@ def get_results_df_cached(output_dir_path, res_dirs): .replace("-hf", "") .replace("-2T", "-2.0T") .replace("9B-fp8", "9B-2.2T-fp8") - .replace("ibm-fms/", "") + .replace("ibm-ai-platform/", "") .replace("instruct_models/", "") .replace("Bamba_annealed_models/", "") ) diff --git a/tuning/Fine-tuning.md b/tuning/Fine-tuning.md index ba37709..88c7c14 100644 --- a/tuning/Fine-tuning.md +++ b/tuning/Fine-tuning.md @@ -16,7 +16,7 @@ dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train") # We load the model and the tokenizer # TODO: change path to bamba model when uploaded -model_path = "ibm-fms/Bamba-9B" +model_path = "ibm-ai-platform/Bamba-9B" model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path)