diff --git a/gallery/index.yaml b/gallery/index.yaml index b58e8ca7dbc3..8879a985acd1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,29 @@ --- +- name: "ms3.2-24b-penumbra-aether-i1" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/mradermacher/MS3.2-24B-Penumbra-Aether-i1-GGUF + description: | + This model, **MS3.2-24B-Penumbra-Aether-i1-GGUF**, is a quantized version of the **Vortex5/MS3.2-24B-Penumbra-Aether** large language model. It features 24 billion parameters and is designed for efficient deployment with advanced quantization techniques (e.g., IQ3_XXS, Q2_K). The quantized version is optimized for performance and memory efficiency, suitable for applications requiring low-latency inference. It supports GGUF format and is part of the Penumbra series, known for high-quality training and specialized use cases. The model is available for download and further customization through the Hugging Face platform. + overrides: + parameters: + model: llama-cpp/models/MS3.2-24B-Penumbra-Aether.i1-Q4_K_M.gguf + name: MS3.2-24B-Penumbra-Aether-i1-GGUF + backend: llama-cpp + template: + use_tokenizer_template: true + known_usecases: + - chat + function: + grammar: + disable: true + description: Imported from https://huggingface.co/mradermacher/MS3.2-24B-Penumbra-Aether-i1-GGUF + options: + - use_jinja:true + files: + - filename: llama-cpp/models/MS3.2-24B-Penumbra-Aether.i1-Q4_K_M.gguf + sha256: 785c58e234707812d882bafbfb3e28bdc5a682399298d5ca8a1245fc0b8f40da + uri: https://huggingface.co/mradermacher/MS3.2-24B-Penumbra-Aether-i1-GGUF/resolve/main/MS3.2-24B-Penumbra-Aether.i1-Q4_K_M.gguf - name: "rwkv7-g1c-13.3b" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: