temporary: inference validation script

2024-03-17 19:48:52 +01:00
parent 035e680631
commit 2ea75b4160
1 changed files with 42 additions and 0 deletions
--- a/examples/mistral/mixtral_fused.py
+++ b/examples/mistral/mixtral_fused.py
@@ -0,0 +1,42 @@
 from axolotl.monkeypatch.moe.moe import SparseMoeBlock
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
 model_path = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 # Load model
 model = AutoModelForCausalLM.from_pretrained(model_path)
 for name, module in model.named_modules():
    if isinstance(module, MixtralSparseMoeBlock):
        smoe = SparseMoeBlock(
            experts=module.experts,
            gate=module.gate,
            hidden_dim=module.hidden_dim,
            ffn_dim=module.ffn_dim,
            num_experts=module.num_experts,
            top_k=module.top_k,
        )
        setattr(model, name, smoe)
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 # Convert prompt to tokens
 prompt_template = "[INST] {prompt} [/INST]"
 prompt = "You're standing on the surface of the Earth. "\
        "You walk one mile south, one mile west and one mile north. "\
        "You end up exactly where you started. Where are you?"
 tokens = tokenizer(
    prompt_template.format(prompt=prompt), 
    return_tensors='pt'
 ).input_ids.cuda()
 # Generate output
 generation_output = model.generate(
    tokens, 
    streamer=streamer,
    max_new_tokens=512
 )