[
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.26\\.ffn_down.*=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 41 -ot 'blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T14:39:18Z",
        "avg_ns": 1742300767,
        "stddev_ns": 0,
        "avg_ts": 73.466076,
        "stddev_ts": 0.0,
        "samples_ns": [
          1742300767
        ],
        "samples_ts": [
          73.4661
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T14:39:28Z",
        "avg_ns": 1753535571,
        "stddev_ns": 0,
        "avg_ts": 72.995383,
        "stddev_ts": 0.0,
        "samples_ns": [
          1753535571
        ],
        "samples_ts": [
          72.9954
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T14:39:41Z",
        "avg_ns": 1805682710,
        "stddev_ns": 0,
        "avg_ts": 70.887316,
        "stddev_ts": 0.0,
        "samples_ns": [
          1805682710
        ],
        "samples_ts": [
          70.8873
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T14:40:01Z",
        "avg_ns": 1926285003,
        "stddev_ns": 0,
        "avg_ts": 66.449149,
        "stddev_ts": 0.0,
        "samples_ns": [
          1926285003
        ],
        "samples_ts": [
          66.4491
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T14:40:33Z",
        "avg_ns": 2179389922,
        "stddev_ns": 0,
        "avg_ts": 58.732033,
        "stddev_ts": 0.0,
        "samples_ns": [
          2179389922
        ],
        "samples_ts": [
          58.732
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_down.*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T14:41:34Z",
        "avg_ns": 2725147674,
        "stddev_ns": 0,
        "avg_ts": 46.969932,
        "stddev_ts": 0.0,
        "samples_ns": [
          2725147674
        ],
        "samples_ts": [
          46.9699
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.24\\.ffn_(gate|gate_up|down).*=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 41 -ot 'blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T14:44:06Z",
        "avg_ns": 1899624276,
        "stddev_ns": 0,
        "avg_ts": 67.381746,
        "stddev_ts": 0.0,
        "samples_ns": [
          1899624276
        ],
        "samples_ts": [
          67.3817
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T14:44:16Z",
        "avg_ns": 1910042265,
        "stddev_ns": 0,
        "avg_ts": 67.014224,
        "stddev_ts": 0.0,
        "samples_ns": [
          1910042265
        ],
        "samples_ts": [
          67.0142
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T14:44:30Z",
        "avg_ns": 1966205055,
        "stddev_ns": 0,
        "avg_ts": 65.100026,
        "stddev_ts": 0.0,
        "samples_ns": [
          1966205055
        ],
        "samples_ts": [
          65.1
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T14:44:51Z",
        "avg_ns": 2084410444,
        "stddev_ns": 0,
        "avg_ts": 61.408251,
        "stddev_ts": 0.0,
        "samples_ns": [
          2084410444
        ],
        "samples_ts": [
          61.4083
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T14:45:26Z",
        "avg_ns": 2340164997,
        "stddev_ns": 0,
        "avg_ts": 54.696998,
        "stddev_ts": 0.0,
        "samples_ns": [
          2340164997
        ],
        "samples_ts": [
          54.697
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T14:46:32Z",
        "avg_ns": 2871000225,
        "stddev_ns": 0,
        "avg_ts": 44.583765,
        "stddev_ts": 0.0,
        "samples_ns": [
          2871000225
        ],
        "samples_ts": [
          44.5838
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T14:48:50Z",
        "avg_ns": 4022787010,
        "stddev_ns": 0,
        "avg_ts": 31.818737,
        "stddev_ts": 0.0,
        "samples_ns": [
          4022787010
        ],
        "samples_ts": [
          31.8187
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T14:54:32Z",
        "avg_ns": 1828813328,
        "stddev_ns": 0,
        "avg_ts": 69.990741,
        "stddev_ts": 0.0,
        "samples_ns": [
          1828813328
        ],
        "samples_ts": [
          69.9907
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T14:54:42Z",
        "avg_ns": 1835009831,
        "stddev_ns": 0,
        "avg_ts": 69.754395,
        "stddev_ts": 0.0,
        "samples_ns": [
          1835009831
        ],
        "samples_ts": [
          69.7544
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T14:54:56Z",
        "avg_ns": 1861885933,
        "stddev_ns": 0,
        "avg_ts": 68.747498,
        "stddev_ts": 0.0,
        "samples_ns": [
          1861885933
        ],
        "samples_ts": [
          68.7475
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T14:55:16Z",
        "avg_ns": 1921904935,
        "stddev_ns": 0,
        "avg_ts": 66.600589,
        "stddev_ts": 0.0,
        "samples_ns": [
          1921904935
        ],
        "samples_ts": [
          66.6006
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T14:55:51Z",
        "avg_ns": 2026074930,
        "stddev_ns": 0,
        "avg_ts": 63.176341,
        "stddev_ts": 0.0,
        "samples_ns": [
          2026074930
        ],
        "samples_ts": [
          63.1763
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T14:56:55Z",
        "avg_ns": 2243914620,
        "stddev_ns": 0,
        "avg_ts": 57.043169,
        "stddev_ts": 0.0,
        "samples_ns": [
          2243914620
        ],
        "samples_ts": [
          57.0432
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.21\\.ffn_(gate|gate_up|down).*=CPU,blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T14:59:35Z",
        "avg_ns": 2051980752,
        "stddev_ns": 0,
        "avg_ts": 62.378753,
        "stddev_ts": 0.0,
        "samples_ns": [
          2051980752
        ],
        "samples_ts": [
          62.3788
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T14:59:46Z",
        "avg_ns": 2066986485,
        "stddev_ns": 0,
        "avg_ts": 61.925901,
        "stddev_ts": 0.0,
        "samples_ns": [
          2066986485
        ],
        "samples_ts": [
          61.9259
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T15:00:01Z",
        "avg_ns": 2091167857,
        "stddev_ns": 0,
        "avg_ts": 61.209816,
        "stddev_ts": 0.0,
        "samples_ns": [
          2091167857
        ],
        "samples_ts": [
          61.2098
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T15:00:24Z",
        "avg_ns": 2140696135,
        "stddev_ns": 0,
        "avg_ts": 59.793633,
        "stddev_ts": 0.0,
        "samples_ns": [
          2140696135
        ],
        "samples_ts": [
          59.7936
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T15:01:03Z",
        "avg_ns": 2245644834,
        "stddev_ns": 0,
        "avg_ts": 56.999218,
        "stddev_ts": 0.0,
        "samples_ns": [
          2245644834
        ],
        "samples_ts": [
          56.9992
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T15:02:16Z",
        "avg_ns": 2454882281,
        "stddev_ns": 0,
        "avg_ts": 52.140993,
        "stddev_ts": 0.0,
        "samples_ns": [
          2454882281
        ],
        "samples_ts": [
          52.141
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T15:04:47Z",
        "avg_ns": 2883027172,
        "stddev_ns": 0,
        "avg_ts": 44.397778,
        "stddev_ts": 0.0,
        "samples_ns": [
          2883027172
        ],
        "samples_ts": [
          44.3978
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T15:10:52Z",
        "avg_ns": 1853611948,
        "stddev_ns": 0,
        "avg_ts": 69.054367,
        "stddev_ts": 0.0,
        "samples_ns": [
          1853611948
        ],
        "samples_ts": [
          69.0544
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T15:11:03Z",
        "avg_ns": 1898175001,
        "stddev_ns": 0,
        "avg_ts": 67.433192,
        "stddev_ts": 0.0,
        "samples_ns": [
          1898175001
        ],
        "samples_ts": [
          67.4332
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T15:11:16Z",
        "avg_ns": 1987259945,
        "stddev_ns": 0,
        "avg_ts": 64.410295,
        "stddev_ts": 0.0,
        "samples_ns": [
          1987259945
        ],
        "samples_ts": [
          64.4103
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T15:11:37Z",
        "avg_ns": 2174286705,
        "stddev_ns": 0,
        "avg_ts": 58.869881,
        "stddev_ts": 0.0,
        "samples_ns": [
          2174286705
        ],
        "samples_ts": [
          58.8699
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T15:12:12Z",
        "avg_ns": 2551150656,
        "stddev_ns": 0,
        "avg_ts": 50.173438,
        "stddev_ts": 0.0,
        "samples_ns": [
          2551150656
        ],
        "samples_ts": [
          50.1734
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T15:13:17Z",
        "avg_ns": 3332897209,
        "stddev_ns": 0,
        "avg_ts": 38.405025,
        "stddev_ts": 0.0,
        "samples_ns": [
          3332897209
        ],
        "samples_ts": [
          38.405
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q6_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T15:15:59Z",
        "avg_ns": 2112293077,
        "stddev_ns": 0,
        "avg_ts": 60.597652,
        "stddev_ts": 0.0,
        "samples_ns": [
          2112293077
        ],
        "samples_ts": [
          60.5977
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T15:16:11Z",
        "avg_ns": 2149934483,
        "stddev_ns": 0,
        "avg_ts": 59.536698,
        "stddev_ts": 0.0,
        "samples_ns": [
          2149934483
        ],
        "samples_ts": [
          59.5367
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T15:16:26Z",
        "avg_ns": 2245553819,
        "stddev_ns": 0,
        "avg_ts": 57.001528,
        "stddev_ts": 0.0,
        "samples_ns": [
          2245553819
        ],
        "samples_ts": [
          57.0015
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T15:16:49Z",
        "avg_ns": 2421306748,
        "stddev_ns": 0,
        "avg_ts": 52.864017,
        "stddev_ts": 0.0,
        "samples_ns": [
          2421306748
        ],
        "samples_ts": [
          52.864
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T15:17:28Z",
        "avg_ns": 2794689305,
        "stddev_ns": 0,
        "avg_ts": 45.801156,
        "stddev_ts": 0.0,
        "samples_ns": [
          2794689305
        ],
        "samples_ts": [
          45.8012
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T15:18:43Z",
        "avg_ns": 3561695079,
        "stddev_ns": 0,
        "avg_ts": 35.937944,
        "stddev_ts": 0.0,
        "samples_ns": [
          3561695079
        ],
        "samples_ts": [
          35.9379
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q6_K",
        "model_size": 31832787456,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T15:21:17Z",
        "avg_ns": 5144548152,
        "stddev_ns": 0,
        "avg_ts": 24.880708,
        "stddev_ts": 0.0,
        "samples_ns": [
          5144548152
        ],
        "samples_ts": [
          24.8807
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 120832,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_result_params": "-c 120832 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:06:13Z",
        "avg_ns": 945433938,
        "stddev_ns": 0,
        "avg_ts": 135.387566,
        "stddev_ts": 0.0,
        "samples_ns": [
          945433938
        ],
        "samples_ts": [
          135.388
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:06:21Z",
        "avg_ns": 973697666,
        "stddev_ns": 0,
        "avg_ts": 131.457643,
        "stddev_ts": 0.0,
        "samples_ns": [
          973697666
        ],
        "samples_ts": [
          131.458
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:06:30Z",
        "avg_ns": 1043033906,
        "stddev_ns": 0,
        "avg_ts": 122.718925,
        "stddev_ts": 0.0,
        "samples_ns": [
          1043033906
        ],
        "samples_ts": [
          122.719
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:06:42Z",
        "avg_ns": 1187151566,
        "stddev_ns": 0,
        "avg_ts": 107.82111,
        "stddev_ts": 0.0,
        "samples_ns": [
          1187151566
        ],
        "samples_ts": [
          107.821
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:07:01Z",
        "avg_ns": 1484310269,
        "stddev_ns": 0,
        "avg_ts": 86.23534,
        "stddev_ts": 0.0,
        "samples_ns": [
          1484310269
        ],
        "samples_ts": [
          86.2353
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 120832,
        "test_time": "2026-06-08T16:07:36Z",
        "avg_ns": 1996592988,
        "stddev_ns": 0,
        "avg_ts": 64.10921,
        "stddev_ts": 0.0,
        "samples_ns": [
          1996592988
        ],
        "samples_ts": [
          64.1092
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 71680,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit off",
    "fit_result_params": "-c 71680 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:08:52Z",
        "avg_ns": 910820978,
        "stddev_ns": 0,
        "avg_ts": 140.532556,
        "stddev_ts": 0.0,
        "samples_ns": [
          910820978
        ],
        "samples_ts": [
          140.533
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:09:00Z",
        "avg_ns": 923753882,
        "stddev_ns": 0,
        "avg_ts": 138.565047,
        "stddev_ts": 0.0,
        "samples_ns": [
          923753882
        ],
        "samples_ts": [
          138.565
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:09:09Z",
        "avg_ns": 951864998,
        "stddev_ns": 0,
        "avg_ts": 134.472851,
        "stddev_ts": 0.0,
        "samples_ns": [
          951864998
        ],
        "samples_ts": [
          134.473
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:09:21Z",
        "avg_ns": 1007336749,
        "stddev_ns": 0,
        "avg_ts": 127.067736,
        "stddev_ts": 0.0,
        "samples_ns": [
          1007336749
        ],
        "samples_ts": [
          127.068
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:09:40Z",
        "avg_ns": 1110565143,
        "stddev_ns": 0,
        "avg_ts": 115.256634,
        "stddev_ts": 0.0,
        "samples_ns": [
          1110565143
        ],
        "samples_ts": [
          115.257
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 71680,
        "test_time": "2026-06-08T16:10:15Z",
        "avg_ns": 1130224610,
        "stddev_ns": 0,
        "avg_ts": 113.251825,
        "stddev_ts": 0.0,
        "samples_ns": [
          1130224610
        ],
        "samples_ts": [
          113.252
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 67584,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_result_params": "-c 67584 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:10:59Z",
        "avg_ns": 956860033,
        "stddev_ns": 0,
        "avg_ts": 133.770871,
        "stddev_ts": 0.0,
        "samples_ns": [
          956860033
        ],
        "samples_ts": [
          133.771
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:11:06Z",
        "avg_ns": 1005250146,
        "stddev_ns": 0,
        "avg_ts": 127.331491,
        "stddev_ts": 0.0,
        "samples_ns": [
          1005250146
        ],
        "samples_ts": [
          127.331
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:11:15Z",
        "avg_ns": 1104021114,
        "stddev_ns": 0,
        "avg_ts": 115.939812,
        "stddev_ts": 0.0,
        "samples_ns": [
          1104021114
        ],
        "samples_ts": [
          115.94
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:11:28Z",
        "avg_ns": 1308759442,
        "stddev_ns": 0,
        "avg_ts": 97.802542,
        "stddev_ts": 0.0,
        "samples_ns": [
          1308759442
        ],
        "samples_ts": [
          97.8025
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:11:47Z",
        "avg_ns": 1710414556,
        "stddev_ns": 0,
        "avg_ts": 74.835659,
        "stddev_ts": 0.0,
        "samples_ns": [
          1710414556
        ],
        "samples_ts": [
          74.8357
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 67584,
        "test_time": "2026-06-08T16:12:23Z",
        "avg_ns": 1737557017,
        "stddev_ns": 0,
        "avg_ts": 73.666647,
        "stddev_ts": 0.0,
        "samples_ns": [
          1737557017
        ],
        "samples_ts": [
          73.6666
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.36\\.ffn_(gate|gate_up|down).*=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 41 -ot 'blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:13:08Z",
        "avg_ns": 1077292815,
        "stddev_ns": 0,
        "avg_ts": 118.81635,
        "stddev_ts": 0.0,
        "samples_ns": [
          1077292815
        ],
        "samples_ts": [
          118.816
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:13:16Z",
        "avg_ns": 1101694645,
        "stddev_ns": 0,
        "avg_ts": 116.184644,
        "stddev_ts": 0.0,
        "samples_ns": [
          1101694645
        ],
        "samples_ts": [
          116.185
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:13:26Z",
        "avg_ns": 1170175240,
        "stddev_ns": 0,
        "avg_ts": 109.385326,
        "stddev_ts": 0.0,
        "samples_ns": [
          1170175240
        ],
        "samples_ts": [
          109.385
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:13:39Z",
        "avg_ns": 1311872928,
        "stddev_ns": 0,
        "avg_ts": 97.570426,
        "stddev_ts": 0.0,
        "samples_ns": [
          1311872928
        ],
        "samples_ts": [
          97.5704
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:14:00Z",
        "avg_ns": 1598909146,
        "stddev_ns": 0,
        "avg_ts": 80.05458,
        "stddev_ts": 0.0,
        "samples_ns": [
          1598909146
        ],
        "samples_ts": [
          80.0546
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:14:40Z",
        "avg_ns": 2200270957,
        "stddev_ns": 0,
        "avg_ts": 58.174653,
        "stddev_ts": 0.0,
        "samples_ns": [
          2200270957
        ],
        "samples_ts": [
          58.1747
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.36\\.ffn_(gate|gate_up|down).*=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T16:16:06Z",
        "avg_ns": 3405578997,
        "stddev_ns": 0,
        "avg_ts": 37.585386,
        "stddev_ts": 0.0,
        "samples_ns": [
          3405578997
        ],
        "samples_ts": [
          37.5854
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.37\\.ffn_down.*=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:19:58Z",
        "avg_ns": 988248712,
        "stddev_ns": 0,
        "avg_ts": 129.522051,
        "stddev_ts": 0.0,
        "samples_ns": [
          988248712
        ],
        "samples_ts": [
          129.522
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:20:06Z",
        "avg_ns": 999308887,
        "stddev_ns": 0,
        "avg_ts": 128.088524,
        "stddev_ts": 0.0,
        "samples_ns": [
          999308887
        ],
        "samples_ts": [
          128.089
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:20:15Z",
        "avg_ns": 1026790128,
        "stddev_ns": 0,
        "avg_ts": 124.660334,
        "stddev_ts": 0.0,
        "samples_ns": [
          1026790128
        ],
        "samples_ts": [
          124.66
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:20:28Z",
        "avg_ns": 1080963196,
        "stddev_ns": 0,
        "avg_ts": 118.412912,
        "stddev_ts": 0.0,
        "samples_ns": [
          1080963196
        ],
        "samples_ts": [
          118.413
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:20:48Z",
        "avg_ns": 1187937190,
        "stddev_ns": 0,
        "avg_ts": 107.749805,
        "stddev_ts": 0.0,
        "samples_ns": [
          1187937190
        ],
        "samples_ts": [
          107.75
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:21:26Z",
        "avg_ns": 1407748659,
        "stddev_ns": 0,
        "avg_ts": 90.925322,
        "stddev_ts": 0.0,
        "samples_ns": [
          1407748659
        ],
        "samples_ts": [
          90.9253
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:31:21Z",
        "avg_ns": 1198846264,
        "stddev_ns": 0,
        "avg_ts": 106.76932,
        "stddev_ts": 0.0,
        "samples_ns": [
          1198846264
        ],
        "samples_ts": [
          106.769
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:31:29Z",
        "avg_ns": 1211628230,
        "stddev_ns": 0,
        "avg_ts": 105.642966,
        "stddev_ts": 0.0,
        "samples_ns": [
          1211628230
        ],
        "samples_ts": [
          105.643
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:31:40Z",
        "avg_ns": 1239134240,
        "stddev_ns": 0,
        "avg_ts": 103.297928,
        "stddev_ts": 0.0,
        "samples_ns": [
          1239134240
        ],
        "samples_ts": [
          103.298
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:31:55Z",
        "avg_ns": 1292018574,
        "stddev_ns": 0,
        "avg_ts": 99.069783,
        "stddev_ts": 0.0,
        "samples_ns": [
          1292018574
        ],
        "samples_ts": [
          99.0698
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:32:18Z",
        "avg_ns": 1406683360,
        "stddev_ns": 0,
        "avg_ts": 90.994181,
        "stddev_ts": 0.0,
        "samples_ns": [
          1406683360
        ],
        "samples_ts": [
          90.9942
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:33:03Z",
        "avg_ns": 1622977955,
        "stddev_ns": 0,
        "avg_ts": 78.867368,
        "stddev_ts": 0.0,
        "samples_ns": [
          1622977955
        ],
        "samples_ts": [
          78.8674
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.32\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T16:34:39Z",
        "avg_ns": 2065620753,
        "stddev_ns": 0,
        "avg_ts": 61.966845,
        "stddev_ts": 0.0,
        "samples_ns": [
          2065620753
        ],
        "samples_ts": [
          61.9668
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.37\\.ffn_down.*=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:38:51Z",
        "avg_ns": 1032719496,
        "stddev_ns": 0,
        "avg_ts": 123.944595,
        "stddev_ts": 0.0,
        "samples_ns": [
          1032719496
        ],
        "samples_ts": [
          123.945
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:38:59Z",
        "avg_ns": 1080687699,
        "stddev_ns": 0,
        "avg_ts": 118.443099,
        "stddev_ts": 0.0,
        "samples_ns": [
          1080687699
        ],
        "samples_ts": [
          118.443
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:39:08Z",
        "avg_ns": 1180443412,
        "stddev_ns": 0,
        "avg_ts": 108.43383,
        "stddev_ts": 0.0,
        "samples_ns": [
          1180443412
        ],
        "samples_ts": [
          108.434
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:39:21Z",
        "avg_ns": 1383450039,
        "stddev_ns": 0,
        "avg_ts": 92.522315,
        "stddev_ts": 0.0,
        "samples_ns": [
          1383450039
        ],
        "samples_ts": [
          92.5223
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:39:42Z",
        "avg_ns": 1781545858,
        "stddev_ns": 0,
        "avg_ts": 71.847716,
        "stddev_ts": 0.0,
        "samples_ns": [
          1781545858
        ],
        "samples_ts": [
          71.8477
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.37\\.ffn_down.*=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:40:20Z",
        "avg_ns": 2601432796,
        "stddev_ns": 0,
        "avg_ts": 49.203654,
        "stddev_ts": 0.0,
        "samples_ns": [
          2601432796
        ],
        "samples_ts": [
          49.2037
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|gate_up|down).*=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:41:55Z",
        "avg_ns": 1286308245,
        "stddev_ns": 0,
        "avg_ts": 99.509585,
        "stddev_ts": 0.0,
        "samples_ns": [
          1286308245
        ],
        "samples_ts": [
          99.5096
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:42:03Z",
        "avg_ns": 1325839717,
        "stddev_ns": 0,
        "avg_ts": 96.54259,
        "stddev_ts": 0.0,
        "samples_ns": [
          1325839717
        ],
        "samples_ts": [
          96.5426
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:42:14Z",
        "avg_ns": 1436991744,
        "stddev_ns": 0,
        "avg_ts": 89.074972,
        "stddev_ts": 0.0,
        "samples_ns": [
          1436991744
        ],
        "samples_ts": [
          89.075
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:42:29Z",
        "avg_ns": 1625269154,
        "stddev_ns": 0,
        "avg_ts": 78.756186,
        "stddev_ts": 0.0,
        "samples_ns": [
          1625269154
        ],
        "samples_ts": [
          78.7562
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:42:54Z",
        "avg_ns": 2020312209,
        "stddev_ns": 0,
        "avg_ts": 63.356544,
        "stddev_ts": 0.0,
        "samples_ns": [
          2020312209
        ],
        "samples_ts": [
          63.3565
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:43:41Z",
        "avg_ns": 2811807537,
        "stddev_ns": 0,
        "avg_ts": 45.522319,
        "stddev_ts": 0.0,
        "samples_ns": [
          2811807537
        ],
        "samples_ts": [
          45.5223
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T16:45:21Z",
        "avg_ns": 4431867786,
        "stddev_ns": 0,
        "avg_ts": 28.881728,
        "stddev_ts": 0.0,
        "samples_ns": [
          4431867786
        ],
        "samples_ts": [
          28.8817
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit off -cmoe",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit off -cmoe",
    "fit_result_params": "-c 0 -ngl -1 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 999 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "off"
        ],
        [
          "-cmoe"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "999"
        ],
        [
          "-ot",
          "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T16:49:53Z",
        "avg_ns": 2583262087,
        "stddev_ns": 0,
        "avg_ts": 49.549754,
        "stddev_ts": 0.0,
        "samples_ns": [
          2583262087
        ],
        "samples_ts": [
          49.5498
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T16:50:06Z",
        "avg_ns": 2628464710,
        "stddev_ns": 0,
        "avg_ts": 48.697629,
        "stddev_ts": 0.0,
        "samples_ns": [
          2628464710
        ],
        "samples_ts": [
          48.6976
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T16:50:24Z",
        "avg_ns": 2723961738,
        "stddev_ns": 0,
        "avg_ts": 46.990381,
        "stddev_ts": 0.0,
        "samples_ns": [
          2723961738
        ],
        "samples_ts": [
          46.9904
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T16:50:54Z",
        "avg_ns": 2896128473,
        "stddev_ns": 0,
        "avg_ts": 44.196934,
        "stddev_ts": 0.0,
        "samples_ns": [
          2896128473
        ],
        "samples_ts": [
          44.1969
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T16:51:45Z",
        "avg_ns": 3242353912,
        "stddev_ns": 0,
        "avg_ts": 39.477492,
        "stddev_ts": 0.0,
        "samples_ns": [
          3242353912
        ],
        "samples_ts": [
          39.4775
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T16:53:22Z",
        "avg_ns": 4011205582,
        "stddev_ns": 0,
        "avg_ts": 31.910606,
        "stddev_ts": 0.0,
        "samples_ns": [
          4011205582
        ],
        "samples_ts": [
          31.9106
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T16:56:40Z",
        "avg_ns": 5521635931,
        "stddev_ns": 0,
        "avg_ts": 23.181536,
        "stddev_ts": 0.0,
        "samples_ns": [
          5521635931
        ],
        "samples_ts": [
          23.1815
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit off -cmoe",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit off -cmoe",
    "fit_result_params": "-c 0 -ngl -1 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "off"
        ],
        [
          "-cmoe"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ],
        [
          "-ot",
          "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:04:19Z",
        "avg_ns": 2581509723,
        "stddev_ns": 0,
        "avg_ts": 49.583389,
        "stddev_ts": 0.0,
        "samples_ns": [
          2581509723
        ],
        "samples_ts": [
          49.5834
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:04:32Z",
        "avg_ns": 2595833120,
        "stddev_ns": 0,
        "avg_ts": 49.309795,
        "stddev_ts": 0.0,
        "samples_ns": [
          2595833120
        ],
        "samples_ts": [
          49.3098
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:04:50Z",
        "avg_ns": 2650092940,
        "stddev_ns": 0,
        "avg_ts": 48.300193,
        "stddev_ts": 0.0,
        "samples_ns": [
          2650092940
        ],
        "samples_ts": [
          48.3002
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:05:19Z",
        "avg_ns": 2764335058,
        "stddev_ns": 0,
        "avg_ts": 46.304083,
        "stddev_ts": 0.0,
        "samples_ns": [
          2764335058
        ],
        "samples_ts": [
          46.3041
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:06:10Z",
        "avg_ns": 2973137727,
        "stddev_ns": 0,
        "avg_ts": 43.05216,
        "stddev_ts": 0.0,
        "samples_ns": [
          2973137727
        ],
        "samples_ts": [
          43.0522
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T17:07:46Z",
        "avg_ns": 3454572270,
        "stddev_ns": 0,
        "avg_ts": 37.052344,
        "stddev_ts": 0.0,
        "samples_ns": [
          3454572270
        ],
        "samples_ts": [
          37.0523
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T17:11:03Z",
        "avg_ns": 4531068816,
        "stddev_ns": 0,
        "avg_ts": 28.249405,
        "stddev_ts": 0.0,
        "samples_ns": [
          4531068816
        ],
        "samples_ts": [
          28.2494
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit off -cmoe",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit off -cmoe",
    "fit_result_params": "-c 0 -ngl -1 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999 -ot '\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "off"
        ],
        [
          "-cmoe"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ],
        [
          "-ot",
          "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:18:41Z",
        "avg_ns": 2551438137,
        "stddev_ns": 0,
        "avg_ts": 50.167785,
        "stddev_ts": 0.0,
        "samples_ns": [
          2551438137
        ],
        "samples_ts": [
          50.1678
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:18:54Z",
        "avg_ns": 2570013438,
        "stddev_ns": 0,
        "avg_ts": 49.805187,
        "stddev_ts": 0.0,
        "samples_ns": [
          2570013438
        ],
        "samples_ts": [
          49.8052
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:19:12Z",
        "avg_ns": 2593712406,
        "stddev_ns": 0,
        "avg_ts": 49.350113,
        "stddev_ts": 0.0,
        "samples_ns": [
          2593712406
        ],
        "samples_ts": [
          49.3501
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:19:42Z",
        "avg_ns": 2651353613,
        "stddev_ns": 0,
        "avg_ts": 48.277227,
        "stddev_ts": 0.0,
        "samples_ns": [
          2651353613
        ],
        "samples_ts": [
          48.2772
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:20:32Z",
        "avg_ns": 2771211388,
        "stddev_ns": 0,
        "avg_ts": 46.189187,
        "stddev_ts": 0.0,
        "samples_ns": [
          2771211388
        ],
        "samples_ts": [
          46.1892
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T17:22:09Z",
        "avg_ns": 2951467047,
        "stddev_ns": 0,
        "avg_ts": 43.368263,
        "stddev_ts": 0.0,
        "samples_ns": [
          2951467047
        ],
        "samples_ts": [
          43.3683
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q4_K - Medium",
        "model_size": 22349466112,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "\\.ffn_(up|down|gate|gate_up)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T17:25:25Z",
        "avg_ns": 3361471536,
        "stddev_ns": 0,
        "avg_ts": 38.078561,
        "stddev_ts": 0.0,
        "samples_ns": [
          3361471536
        ],
        "samples_ts": [
          38.0786
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-27B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 89856,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_result_params": "-c 89856 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:32:50Z",
        "avg_ns": 4223872713,
        "stddev_ns": 0,
        "avg_ts": 30.303943,
        "stddev_ts": 0.0,
        "samples_ns": [
          4223872713
        ],
        "samples_ts": [
          30.3039
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:33:03Z",
        "avg_ns": 4365446858,
        "stddev_ns": 0,
        "avg_ts": 29.321168,
        "stddev_ts": 0.0,
        "samples_ns": [
          4365446858
        ],
        "samples_ts": [
          29.3212
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:33:20Z",
        "avg_ns": 4691784721,
        "stddev_ns": 0,
        "avg_ts": 27.281729,
        "stddev_ts": 0.0,
        "samples_ns": [
          4691784721
        ],
        "samples_ts": [
          27.2817
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:33:46Z",
        "avg_ns": 5335845571,
        "stddev_ns": 0,
        "avg_ts": 23.9887,
        "stddev_ts": 0.0,
        "samples_ns": [
          5335845571
        ],
        "samples_ts": [
          23.9887
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:34:30Z",
        "avg_ns": 6614021772,
        "stddev_ns": 0,
        "avg_ts": 19.352824,
        "stddev_ts": 0.0,
        "samples_ns": [
          6614021772
        ],
        "samples_ts": [
          19.3528
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 89856,
        "test_time": "2026-06-08T17:36:01Z",
        "avg_ns": 7524608940,
        "stddev_ns": 0,
        "avg_ts": 17.010851,
        "stddev_ts": 0.0,
        "samples_ns": [
          7524608940
        ],
        "samples_ts": [
          17.0109
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-27B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 162304,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_result_params": "-c 162304 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:38:19Z",
        "avg_ns": 4184191315,
        "stddev_ns": 0,
        "avg_ts": 30.591335,
        "stddev_ts": 0.0,
        "samples_ns": [
          4184191315
        ],
        "samples_ts": [
          30.5913
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:38:32Z",
        "avg_ns": 4277105694,
        "stddev_ns": 0,
        "avg_ts": 29.92678,
        "stddev_ts": 0.0,
        "samples_ns": [
          4277105694
        ],
        "samples_ts": [
          29.9268
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:38:49Z",
        "avg_ns": 4483685467,
        "stddev_ns": 0,
        "avg_ts": 28.547944,
        "stddev_ts": 0.0,
        "samples_ns": [
          4483685467
        ],
        "samples_ts": [
          28.5479
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:39:14Z",
        "avg_ns": 4969815918,
        "stddev_ns": 0,
        "avg_ts": 25.755481,
        "stddev_ts": 0.0,
        "samples_ns": [
          4969815918
        ],
        "samples_ts": [
          25.7555
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:39:58Z",
        "avg_ns": 6054907291,
        "stddev_ns": 0,
        "avg_ts": 21.139878,
        "stddev_ts": 0.0,
        "samples_ns": [
          6054907291
        ],
        "samples_ts": [
          21.1399
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T17:41:27Z",
        "avg_ns": 8193619546,
        "stddev_ns": 0,
        "avg_ts": 15.621912,
        "stddev_ts": 0.0,
        "samples_ns": [
          8193619546
        ],
        "samples_ts": [
          15.6219
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 162304,
        "test_time": "2026-06-08T17:45:07Z",
        "avg_ns": 9308266760,
        "stddev_ns": 0,
        "avg_ts": 13.751217,
        "stddev_ts": 0.0,
        "samples_ns": [
          9308266760
        ],
        "samples_ts": [
          13.7512
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/Qwen3.6-27B-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 94464,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit off",
    "fit_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit off",
    "fit_result_params": "-c 94464 -ngl -1",
    "bench_params": "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:50:15Z",
        "avg_ns": 4100615553,
        "stddev_ns": 0,
        "avg_ts": 31.214826,
        "stddev_ts": 0.0,
        "samples_ns": [
          4100615553
        ],
        "samples_ts": [
          31.2148
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:50:28Z",
        "avg_ns": 4129508800,
        "stddev_ns": 0,
        "avg_ts": 30.996423,
        "stddev_ts": 0.0,
        "samples_ns": [
          4129508800
        ],
        "samples_ts": [
          30.9964
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:50:45Z",
        "avg_ns": 4206637034,
        "stddev_ns": 0,
        "avg_ts": 30.428107,
        "stddev_ts": 0.0,
        "samples_ns": [
          4206637034
        ],
        "samples_ts": [
          30.4281
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:51:10Z",
        "avg_ns": 4396189610,
        "stddev_ns": 0,
        "avg_ts": 29.116124,
        "stddev_ts": 0.0,
        "samples_ns": [
          4396189610
        ],
        "samples_ts": [
          29.1161
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:51:53Z",
        "avg_ns": 4752540419,
        "stddev_ns": 0,
        "avg_ts": 26.932964,
        "stddev_ts": 0.0,
        "samples_ns": [
          4752540419
        ],
        "samples_ts": [
          26.933
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-27B-GGUF/snapshots/82d411acf4a06cfb8d9b073a5211bf410bfc29bf/Qwen3.6-27B-UD-Q4_K_XL.gguf",
        "model_type": "qwen35 27B Q4_K - Medium",
        "model_size": 17601570816,
        "model_n_params": 26895998464,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 94464,
        "test_time": "2026-06-08T17:53:22Z",
        "avg_ns": 5031435323,
        "stddev_ns": 0,
        "avg_ts": 25.440057,
        "stddev_ts": 0.0,
        "samples_ns": [
          5031435323
        ],
        "samples_ts": [
          25.4401
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 255232,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_result_params": "-c 255232 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T17:55:52Z",
        "avg_ns": 1232707740,
        "stddev_ns": 0,
        "avg_ts": 103.836454,
        "stddev_ts": 0.0,
        "samples_ns": [
          1232707740
        ],
        "samples_ts": [
          103.836
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T17:56:00Z",
        "avg_ns": 1282837728,
        "stddev_ns": 0,
        "avg_ts": 99.778793,
        "stddev_ts": 0.0,
        "samples_ns": [
          1282837728
        ],
        "samples_ts": [
          99.7788
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T17:56:08Z",
        "avg_ns": 1380637678,
        "stddev_ns": 0,
        "avg_ts": 92.710783,
        "stddev_ts": 0.0,
        "samples_ns": [
          1380637678
        ],
        "samples_ts": [
          92.7108
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T17:56:20Z",
        "avg_ns": 1581975039,
        "stddev_ns": 0,
        "avg_ts": 80.911517,
        "stddev_ts": 0.0,
        "samples_ns": [
          1581975039
        ],
        "samples_ts": [
          80.9115
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T17:56:37Z",
        "avg_ns": 1988833362,
        "stddev_ns": 0,
        "avg_ts": 64.359339,
        "stddev_ts": 0.0,
        "samples_ns": [
          1988833362
        ],
        "samples_ts": [
          64.3593
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T17:57:11Z",
        "avg_ns": 2807034895,
        "stddev_ns": 0,
        "avg_ts": 45.599718,
        "stddev_ts": 0.0,
        "samples_ns": [
          2807034895
        ],
        "samples_ts": [
          45.5997
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 255232,
        "test_time": "2026-06-08T17:58:31Z",
        "avg_ns": 4349379141,
        "stddev_ns": 0,
        "avg_ts": 29.429488,
        "stddev_ts": 0.0,
        "samples_ns": [
          4349379141
        ],
        "samples_ts": [
          29.4295
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_result_params": "-c 0 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:02:19Z",
        "avg_ns": 1116502416,
        "stddev_ns": 0,
        "avg_ts": 114.643729,
        "stddev_ts": 0.0,
        "samples_ns": [
          1116502416
        ],
        "samples_ts": [
          114.644
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:02:26Z",
        "avg_ns": 1138634128,
        "stddev_ns": 0,
        "avg_ts": 112.41539,
        "stddev_ts": 0.0,
        "samples_ns": [
          1138634128
        ],
        "samples_ts": [
          112.415
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:02:35Z",
        "avg_ns": 1205068654,
        "stddev_ns": 0,
        "avg_ts": 106.218015,
        "stddev_ts": 0.0,
        "samples_ns": [
          1205068654
        ],
        "samples_ts": [
          106.218
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:02:46Z",
        "avg_ns": 1342393134,
        "stddev_ns": 0,
        "avg_ts": 95.352097,
        "stddev_ts": 0.0,
        "samples_ns": [
          1342393134
        ],
        "samples_ts": [
          95.3521
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:03:03Z",
        "avg_ns": 1610790351,
        "stddev_ns": 0,
        "avg_ts": 79.464097,
        "stddev_ts": 0.0,
        "samples_ns": [
          1610790351
        ],
        "samples_ts": [
          79.4641
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:03:37Z",
        "avg_ns": 2152584818,
        "stddev_ns": 0,
        "avg_ts": 59.463394,
        "stddev_ts": 0.0,
        "samples_ns": [
          2152584818
        ],
        "samples_ts": [
          59.4634
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:04:56Z",
        "avg_ns": 3212086483,
        "stddev_ns": 0,
        "avg_ts": 39.849487,
        "stddev_ts": 0.0,
        "samples_ns": [
          3212086483
        ],
        "samples_ts": [
          39.8495
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit off",
    "fit_result_params": "-c 0 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:08:48Z",
        "avg_ns": 1046478060,
        "stddev_ns": 0,
        "avg_ts": 122.315034,
        "stddev_ts": 0.0,
        "samples_ns": [
          1046478060
        ],
        "samples_ts": [
          122.315
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:08:55Z",
        "avg_ns": 1057928772,
        "stddev_ns": 0,
        "avg_ts": 120.991132,
        "stddev_ts": 0.0,
        "samples_ns": [
          1057928772
        ],
        "samples_ts": [
          120.991
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:09:04Z",
        "avg_ns": 1081572454,
        "stddev_ns": 0,
        "avg_ts": 118.346209,
        "stddev_ts": 0.0,
        "samples_ns": [
          1081572454
        ],
        "samples_ts": [
          118.346
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:09:15Z",
        "avg_ns": 1136731794,
        "stddev_ns": 0,
        "avg_ts": 112.603519,
        "stddev_ts": 0.0,
        "samples_ns": [
          1136731794
        ],
        "samples_ts": [
          112.604
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:09:32Z",
        "avg_ns": 1244429120,
        "stddev_ns": 0,
        "avg_ts": 102.858409,
        "stddev_ts": 0.0,
        "samples_ns": [
          1244429120
        ],
        "samples_ts": [
          102.858
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:10:04Z",
        "avg_ns": 1456700281,
        "stddev_ns": 0,
        "avg_ts": 87.869826,
        "stddev_ts": 0.0,
        "samples_ns": [
          1456700281
        ],
        "samples_ts": [
          87.8698
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:11:22Z",
        "avg_ns": 1883164459,
        "stddev_ns": 0,
        "avg_ts": 67.970697,
        "stddev_ts": 0.0,
        "samples_ns": [
          1883164459
        ],
        "samples_ts": [
          67.9707
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 78592,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk bf16 -ctv bf16 -fit off",
    "fit_result_params": "-c 78592 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:15:15Z",
        "avg_ns": 1267567642,
        "stddev_ns": 0,
        "avg_ts": 100.980804,
        "stddev_ts": 0.0,
        "samples_ns": [
          1267567642
        ],
        "samples_ts": [
          100.981
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:15:23Z",
        "avg_ns": 1316547538,
        "stddev_ns": 0,
        "avg_ts": 97.223986,
        "stddev_ts": 0.0,
        "samples_ns": [
          1316547538
        ],
        "samples_ts": [
          97.224
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:15:31Z",
        "avg_ns": 1414409921,
        "stddev_ns": 0,
        "avg_ts": 90.497103,
        "stddev_ts": 0.0,
        "samples_ns": [
          1414409921
        ],
        "samples_ts": [
          90.4971
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:15:43Z",
        "avg_ns": 1613360769,
        "stddev_ns": 0,
        "avg_ts": 79.337494,
        "stddev_ts": 0.0,
        "samples_ns": [
          1613360769
        ],
        "samples_ts": [
          79.3375
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:16:01Z",
        "avg_ns": 2023146175,
        "stddev_ns": 0,
        "avg_ts": 63.267796,
        "stddev_ts": 0.0,
        "samples_ns": [
          2023146175
        ],
        "samples_ts": [
          63.2678
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 78592,
        "test_time": "2026-06-08T18:16:34Z",
        "avg_ns": 2190759866,
        "stddev_ns": 0,
        "avg_ts": 58.427216,
        "stddev_ts": 0.0,
        "samples_ns": [
          2190759866
        ],
        "samples_ts": [
          58.4272
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 142336,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit off",
    "fit_result_params": "-c 142336 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:17:22Z",
        "avg_ns": 1141482249,
        "stddev_ns": 0,
        "avg_ts": 112.134902,
        "stddev_ts": 0.0,
        "samples_ns": [
          1141482249
        ],
        "samples_ts": [
          112.135
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:17:30Z",
        "avg_ns": 1171811273,
        "stddev_ns": 0,
        "avg_ts": 109.232607,
        "stddev_ts": 0.0,
        "samples_ns": [
          1171811273
        ],
        "samples_ts": [
          109.233
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:17:38Z",
        "avg_ns": 1239822078,
        "stddev_ns": 0,
        "avg_ts": 103.24062,
        "stddev_ts": 0.0,
        "samples_ns": [
          1239822078
        ],
        "samples_ts": [
          103.241
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:17:50Z",
        "avg_ns": 1376763401,
        "stddev_ns": 0,
        "avg_ts": 92.971675,
        "stddev_ts": 0.0,
        "samples_ns": [
          1376763401
        ],
        "samples_ts": [
          92.9717
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:18:07Z",
        "avg_ns": 1646050609,
        "stddev_ns": 0,
        "avg_ts": 77.761886,
        "stddev_ts": 0.0,
        "samples_ns": [
          1646050609
        ],
        "samples_ts": [
          77.7619
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:18:40Z",
        "avg_ns": 2182394157,
        "stddev_ns": 0,
        "avg_ts": 58.651183,
        "stddev_ts": 0.0,
        "samples_ns": [
          2182394157
        ],
        "samples_ts": [
          58.6512
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 142336,
        "test_time": "2026-06-08T18:20:00Z",
        "avg_ns": 2275369830,
        "stddev_ns": 0,
        "avg_ts": 56.254591,
        "stddev_ts": 0.0,
        "samples_ns": [
          2275369830
        ],
        "samples_ts": [
          56.2546
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 90624,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit off",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk f16 -ctv f16 -fit off",
    "fit_result_params": "-c 90624 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "off"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:21:35Z",
        "avg_ns": 1079263653,
        "stddev_ns": 0,
        "avg_ts": 118.59938,
        "stddev_ts": 0.0,
        "samples_ns": [
          1079263653
        ],
        "samples_ts": [
          118.599
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:21:42Z",
        "avg_ns": 1091169824,
        "stddev_ns": 0,
        "avg_ts": 117.305297,
        "stddev_ts": 0.0,
        "samples_ns": [
          1091169824
        ],
        "samples_ts": [
          117.305
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:21:51Z",
        "avg_ns": 1116427626,
        "stddev_ns": 0,
        "avg_ts": 114.651409,
        "stddev_ts": 0.0,
        "samples_ns": [
          1116427626
        ],
        "samples_ts": [
          114.651
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:22:02Z",
        "avg_ns": 1169459977,
        "stddev_ns": 0,
        "avg_ts": 109.452228,
        "stddev_ts": 0.0,
        "samples_ns": [
          1169459977
        ],
        "samples_ts": [
          109.452
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:22:19Z",
        "avg_ns": 1279095047,
        "stddev_ns": 0,
        "avg_ts": 100.070749,
        "stddev_ts": 0.0,
        "samples_ns": [
          1279095047
        ],
        "samples_ts": [
          100.071
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 90624,
        "test_time": "2026-06-08T18:22:52Z",
        "avg_ns": 1362358639,
        "stddev_ns": 0,
        "avg_ts": 93.954702,
        "stddev_ts": 0.0,
        "samples_ns": [
          1362358639
        ],
        "samples_ts": [
          93.9547
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.23\\.ffn_(gate|gate_up|down).*=CPU,blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 31 -ot 'blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:23:51Z",
        "avg_ns": 1973047736,
        "stddev_ns": 0,
        "avg_ts": 64.874254,
        "stddev_ts": 0.0,
        "samples_ns": [
          1973047736
        ],
        "samples_ts": [
          64.8743
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:24:01Z",
        "avg_ns": 2011557131,
        "stddev_ns": 0,
        "avg_ts": 63.632297,
        "stddev_ts": 0.0,
        "samples_ns": [
          2011557131
        ],
        "samples_ts": [
          63.6323
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:24:12Z",
        "avg_ns": 2106679555,
        "stddev_ns": 0,
        "avg_ts": 60.759122,
        "stddev_ts": 0.0,
        "samples_ns": [
          2106679555
        ],
        "samples_ts": [
          60.7591
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:24:28Z",
        "avg_ns": 2294616339,
        "stddev_ns": 0,
        "avg_ts": 55.782746,
        "stddev_ts": 0.0,
        "samples_ns": [
          2294616339
        ],
        "samples_ts": [
          55.7827
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:24:53Z",
        "avg_ns": 2677411676,
        "stddev_ns": 0,
        "avg_ts": 47.807366,
        "stddev_ts": 0.0,
        "samples_ns": [
          2677411676
        ],
        "samples_ts": [
          47.8074
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:25:40Z",
        "avg_ns": 3475516723,
        "stddev_ns": 0,
        "avg_ts": 36.829056,
        "stddev_ts": 0.0,
        "samples_ns": [
          3475516723
        ],
        "samples_ts": [
          36.8291
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.23\\.ffn_(gate|gate_up|down).*=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:27:27Z",
        "avg_ns": 5086773984,
        "stddev_ns": 0,
        "avg_ts": 25.163296,
        "stddev_ts": 0.0,
        "samples_ns": [
          5086773984
        ],
        "samples_ts": [
          25.1633
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.27\\.ffn_(gate|gate_up|down).*=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 31 -ot 'blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:32:18Z",
        "avg_ns": 1440885828,
        "stddev_ns": 0,
        "avg_ts": 88.834242,
        "stddev_ts": 0.0,
        "samples_ns": [
          1440885828
        ],
        "samples_ts": [
          88.8342
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:32:26Z",
        "avg_ns": 1476067823,
        "stddev_ns": 0,
        "avg_ts": 86.716883,
        "stddev_ts": 0.0,
        "samples_ns": [
          1476067823
        ],
        "samples_ts": [
          86.7169
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:32:35Z",
        "avg_ns": 1542388606,
        "stddev_ns": 0,
        "avg_ts": 82.988165,
        "stddev_ts": 0.0,
        "samples_ns": [
          1542388606
        ],
        "samples_ts": [
          82.9882
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:32:48Z",
        "avg_ns": 1674975883,
        "stddev_ns": 0,
        "avg_ts": 76.419011,
        "stddev_ts": 0.0,
        "samples_ns": [
          1674975883
        ],
        "samples_ts": [
          76.419
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:33:08Z",
        "avg_ns": 1946829555,
        "stddev_ns": 0,
        "avg_ts": 65.747923,
        "stddev_ts": 0.0,
        "samples_ns": [
          1946829555
        ],
        "samples_ts": [
          65.7479
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:33:45Z",
        "avg_ns": 2485585566,
        "stddev_ns": 0,
        "avg_ts": 51.49692,
        "stddev_ts": 0.0,
        "samples_ns": [
          2485585566
        ],
        "samples_ts": [
          51.4969
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.27\\.ffn_(gate|gate_up|down).*=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:35:12Z",
        "avg_ns": 3553871843,
        "stddev_ns": 0,
        "avg_ts": 36.017056,
        "stddev_ts": 0.0,
        "samples_ns": [
          3553871843
        ],
        "samples_ts": [
          36.0171
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.24\\.ffn_(gate|gate_up|down).*=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 31 -ot 'blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:39:26Z",
        "avg_ns": 1687220405,
        "stddev_ns": 0,
        "avg_ts": 75.864422,
        "stddev_ts": 0.0,
        "samples_ns": [
          1687220405
        ],
        "samples_ts": [
          75.8644
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:39:35Z",
        "avg_ns": 1696986991,
        "stddev_ns": 0,
        "avg_ts": 75.427803,
        "stddev_ts": 0.0,
        "samples_ns": [
          1696986991
        ],
        "samples_ts": [
          75.4278
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:39:45Z",
        "avg_ns": 1718343385,
        "stddev_ns": 0,
        "avg_ts": 74.49035,
        "stddev_ts": 0.0,
        "samples_ns": [
          1718343385
        ],
        "samples_ts": [
          74.4903
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:40:00Z",
        "avg_ns": 1779055790,
        "stddev_ns": 0,
        "avg_ts": 71.948278,
        "stddev_ts": 0.0,
        "samples_ns": [
          1779055790
        ],
        "samples_ts": [
          71.9483
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:40:23Z",
        "avg_ns": 1878842500,
        "stddev_ns": 0,
        "avg_ts": 68.127052,
        "stddev_ts": 0.0,
        "samples_ns": [
          1878842500
        ],
        "samples_ts": [
          68.1271
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:41:07Z",
        "avg_ns": 2101086893,
        "stddev_ns": 0,
        "avg_ts": 60.92085,
        "stddev_ts": 0.0,
        "samples_ns": [
          2101086893
        ],
        "samples_ts": [
          60.9209
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q5_K - Medium",
        "model_size": 21201943672,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_(gate|gate_up|down).*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:42:47Z",
        "avg_ns": 2525489293,
        "stddev_ns": 0,
        "avg_ts": 50.683248,
        "stddev_ts": 0.0,
        "samples_ns": [
          2525489293
        ],
        "samples_ts": [
          50.6832
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.29\\.ffn_down.*=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 31 -ot 'blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:47:24Z",
        "avg_ns": 1271136065,
        "stddev_ns": 0,
        "avg_ts": 100.697324,
        "stddev_ts": 0.0,
        "samples_ns": [
          1271136065
        ],
        "samples_ts": [
          100.697
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:47:32Z",
        "avg_ns": 1317915569,
        "stddev_ns": 0,
        "avg_ts": 97.123065,
        "stddev_ts": 0.0,
        "samples_ns": [
          1317915569
        ],
        "samples_ts": [
          97.1231
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:47:41Z",
        "avg_ns": 1417089572,
        "stddev_ns": 0,
        "avg_ts": 90.325977,
        "stddev_ts": 0.0,
        "samples_ns": [
          1417089572
        ],
        "samples_ts": [
          90.326
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:47:52Z",
        "avg_ns": 1619375830,
        "stddev_ns": 0,
        "avg_ts": 79.0428,
        "stddev_ts": 0.0,
        "samples_ns": [
          1619375830
        ],
        "samples_ts": [
          79.0428
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:48:10Z",
        "avg_ns": 2027832849,
        "stddev_ns": 0,
        "avg_ts": 63.121573,
        "stddev_ts": 0.0,
        "samples_ns": [
          2027832849
        ],
        "samples_ts": [
          63.1216
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:48:44Z",
        "avg_ns": 2838779122,
        "stddev_ns": 0,
        "avg_ts": 45.089806,
        "stddev_ts": 0.0,
        "samples_ns": [
          2838779122
        ],
        "samples_ts": [
          45.0898
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.29\\.ffn_down.*=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:50:06Z",
        "avg_ns": 4477109008,
        "stddev_ns": 0,
        "avg_ts": 28.589878,
        "stddev_ts": 0.0,
        "samples_ns": [
          4477109008
        ],
        "samples_ts": [
          28.5899
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T18:54:10Z",
        "avg_ns": 1114761817,
        "stddev_ns": 0,
        "avg_ts": 114.822734,
        "stddev_ts": 0.0,
        "samples_ns": [
          1114761817
        ],
        "samples_ts": [
          114.823
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T18:54:17Z",
        "avg_ns": 1139508493,
        "stddev_ns": 0,
        "avg_ts": 112.329132,
        "stddev_ts": 0.0,
        "samples_ns": [
          1139508493
        ],
        "samples_ts": [
          112.329
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T18:54:26Z",
        "avg_ns": 1204998695,
        "stddev_ns": 0,
        "avg_ts": 106.224181,
        "stddev_ts": 0.0,
        "samples_ns": [
          1204998695
        ],
        "samples_ts": [
          106.224
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T18:54:37Z",
        "avg_ns": 1342752140,
        "stddev_ns": 0,
        "avg_ts": 95.326603,
        "stddev_ts": 0.0,
        "samples_ns": [
          1342752140
        ],
        "samples_ts": [
          95.3266
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T18:54:54Z",
        "avg_ns": 1610487011,
        "stddev_ns": 0,
        "avg_ts": 79.479064,
        "stddev_ts": 0.0,
        "samples_ns": [
          1610487011
        ],
        "samples_ts": [
          79.4791
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T18:55:27Z",
        "avg_ns": 2149912886,
        "stddev_ns": 0,
        "avg_ts": 59.537296,
        "stddev_ts": 0.0,
        "samples_ns": [
          2149912886
        ],
        "samples_ts": [
          59.5373
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T18:56:46Z",
        "avg_ns": 3210415126,
        "stddev_ns": 0,
        "avg_ts": 39.870233,
        "stddev_ts": 0.0,
        "samples_ns": [
          3210415126
        ],
        "samples_ts": [
          39.8702
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q4_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl -1",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q4_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 999",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "999"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T19:00:38Z",
        "avg_ns": 1046103439,
        "stddev_ns": 0,
        "avg_ts": 122.358837,
        "stddev_ts": 0.0,
        "samples_ns": [
          1046103439
        ],
        "samples_ts": [
          122.359
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T19:00:46Z",
        "avg_ns": 1056276213,
        "stddev_ns": 0,
        "avg_ts": 121.180425,
        "stddev_ts": 0.0,
        "samples_ns": [
          1056276213
        ],
        "samples_ts": [
          121.18
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T19:00:54Z",
        "avg_ns": 1080931580,
        "stddev_ns": 0,
        "avg_ts": 118.416376,
        "stddev_ts": 0.0,
        "samples_ns": [
          1080931580
        ],
        "samples_ts": [
          118.416
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T19:01:05Z",
        "avg_ns": 1136234791,
        "stddev_ns": 0,
        "avg_ts": 112.652773,
        "stddev_ts": 0.0,
        "samples_ns": [
          1136234791
        ],
        "samples_ts": [
          112.653
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T19:01:23Z",
        "avg_ns": 1245560214,
        "stddev_ns": 0,
        "avg_ts": 102.765004,
        "stddev_ts": 0.0,
        "samples_ns": [
          1245560214
        ],
        "samples_ts": [
          102.765
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T19:01:55Z",
        "avg_ns": 1457777950,
        "stddev_ns": 0,
        "avg_ts": 87.804868,
        "stddev_ts": 0.0,
        "samples_ns": [
          1457777950
        ],
        "samples_ts": [
          87.8049
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q4_K - Medium",
        "model_size": 16995155064,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 999,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "none",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T19:03:12Z",
        "avg_ns": 1882529663,
        "stddev_ns": 0,
        "avg_ts": 67.993617,
        "stddev_ts": 0.0,
        "samples_ns": [
          1882529663
        ],
        "samples_ts": [
          67.9936
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.20\\.ffn_down.*=CPU,blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 31 -ot 'blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T19:07:13Z",
        "avg_ns": 2311577601,
        "stddev_ns": 0,
        "avg_ts": 55.373438,
        "stddev_ts": 0.0,
        "samples_ns": [
          2311577601
        ],
        "samples_ts": [
          55.3734
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T19:07:23Z",
        "avg_ns": 2358861364,
        "stddev_ns": 0,
        "avg_ts": 54.263469,
        "stddev_ts": 0.0,
        "samples_ns": [
          2358861364
        ],
        "samples_ts": [
          54.2635
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T19:07:36Z",
        "avg_ns": 2487256641,
        "stddev_ns": 0,
        "avg_ts": 51.462321,
        "stddev_ts": 0.0,
        "samples_ns": [
          2487256641
        ],
        "samples_ts": [
          51.4623
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T19:07:54Z",
        "avg_ns": 2632716371,
        "stddev_ns": 0,
        "avg_ts": 48.618986,
        "stddev_ts": 0.0,
        "samples_ns": [
          2632716371
        ],
        "samples_ts": [
          48.619
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T19:08:24Z",
        "avg_ns": 3014820697,
        "stddev_ns": 0,
        "avg_ts": 42.45692,
        "stddev_ts": 0.0,
        "samples_ns": [
          3014820697
        ],
        "samples_ts": [
          42.4569
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T19:09:18Z",
        "avg_ns": 3802745938,
        "stddev_ns": 0,
        "avg_ts": 33.659887,
        "stddev_ts": 0.0,
        "samples_ns": [
          3802745938
        ],
        "samples_ts": [
          33.6599
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.20\\.ffn_down.*=CPU;blk\\.21\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T19:11:20Z",
        "avg_ns": 5401602907,
        "stddev_ns": 0,
        "avg_ts": 23.69667,
        "stddev_ts": 0.0,
        "samples_ns": [
          5401602907
        ],
        "samples_ts": [
          23.6967
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.24\\.ffn_down.*=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 31 -ot 'blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T19:16:40Z",
        "avg_ns": 1770532825,
        "stddev_ns": 0,
        "avg_ts": 72.294621,
        "stddev_ts": 0.0,
        "samples_ns": [
          1770532825
        ],
        "samples_ts": [
          72.2946
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T19:16:49Z",
        "avg_ns": 1789868501,
        "stddev_ns": 0,
        "avg_ts": 71.513634,
        "stddev_ts": 0.0,
        "samples_ns": [
          1789868501
        ],
        "samples_ts": [
          71.5136
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T19:17:00Z",
        "avg_ns": 1852117212,
        "stddev_ns": 0,
        "avg_ts": 69.110097,
        "stddev_ts": 0.0,
        "samples_ns": [
          1852117212
        ],
        "samples_ts": [
          69.1101
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T19:17:14Z",
        "avg_ns": 1986779883,
        "stddev_ns": 0,
        "avg_ts": 64.425859,
        "stddev_ts": 0.0,
        "samples_ns": [
          1986779883
        ],
        "samples_ts": [
          64.4259
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T19:17:38Z",
        "avg_ns": 2258347632,
        "stddev_ns": 0,
        "avg_ts": 56.678608,
        "stddev_ts": 0.0,
        "samples_ns": [
          2258347632
        ],
        "samples_ts": [
          56.6786
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T19:18:23Z",
        "avg_ns": 2798213214,
        "stddev_ns": 0,
        "avg_ts": 45.743476,
        "stddev_ts": 0.0,
        "samples_ns": [
          2798213214
        ],
        "samples_ts": [
          45.7435
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.24\\.ffn_down.*=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T19:20:05Z",
        "avg_ns": 3865103518,
        "stddev_ns": 0,
        "avg_ts": 33.116836,
        "stddev_ts": 0.0,
        "samples_ns": [
          3865103518
        ],
        "samples_ts": [
          33.1168
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL",
    "hf_repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
    "quantization": "UD-Q6_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 31 -ot 'blk\\.21\\.ffn_down.*=CPU,blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q6_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 31 -ot 'blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "31"
        ],
        [
          "-ot",
          "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-08T19:24:48Z",
        "avg_ns": 2029032730,
        "stddev_ns": 0,
        "avg_ts": 63.084246,
        "stddev_ts": 0.0,
        "samples_ns": [
          2029032730
        ],
        "samples_ts": [
          63.0842
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-08T19:24:58Z",
        "avg_ns": 2043805741,
        "stddev_ns": 0,
        "avg_ts": 62.628261,
        "stddev_ts": 0.0,
        "samples_ns": [
          2043805741
        ],
        "samples_ts": [
          62.6283
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-08T19:25:10Z",
        "avg_ns": 2068935064,
        "stddev_ns": 0,
        "avg_ts": 61.867577,
        "stddev_ts": 0.0,
        "samples_ns": [
          2068935064
        ],
        "samples_ts": [
          61.8676
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-08T19:25:27Z",
        "avg_ns": 2169224936,
        "stddev_ns": 0,
        "avg_ts": 59.007251,
        "stddev_ts": 0.0,
        "samples_ns": [
          2169224936
        ],
        "samples_ts": [
          59.0073
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-08T19:25:55Z",
        "avg_ns": 2221882953,
        "stddev_ns": 0,
        "avg_ts": 57.608795,
        "stddev_ts": 0.0,
        "samples_ns": [
          2221882953
        ],
        "samples_ts": [
          57.6088
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-08T19:26:46Z",
        "avg_ns": 2437139170,
        "stddev_ns": 0,
        "avg_ts": 52.520595,
        "stddev_ts": 0.0,
        "samples_ns": [
          2437139170
        ],
        "samples_ts": [
          52.5206
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/0a3dba91407e8d492e62a8ec6be90da57a74f738/gemma-4-26B-A4B-it-UD-Q6_K_XL.gguf",
        "model_type": "gemma4 26B.A4B Q6_K",
        "model_size": 23279565944,
        "model_n_params": 25233142046,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 31,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.21\\.ffn_down.*=CPU;blk\\.22\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.23\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.24\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.25\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-08T19:28:41Z",
        "avg_ns": 2859670186,
        "stddev_ns": 0,
        "avg_ts": 44.760407,
        "stddev_ts": 0.0,
        "samples_ns": [
          2859670186
        ],
        "samples_ts": [
          44.7604
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:28:46Z",
        "avg_ns": 1340106593,
        "stddev_ns": 0,
        "avg_ts": 95.51479,
        "stddev_ts": 0.0,
        "samples_ns": [
          1340106593
        ],
        "samples_ts": [
          95.5148
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:28:55Z",
        "avg_ns": 1344377760,
        "stddev_ns": 0,
        "avg_ts": 95.211334,
        "stddev_ts": 0.0,
        "samples_ns": [
          1344377760
        ],
        "samples_ts": [
          95.2113
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:29:06Z",
        "avg_ns": 1403269657,
        "stddev_ns": 0,
        "avg_ts": 91.21554,
        "stddev_ts": 0.0,
        "samples_ns": [
          1403269657
        ],
        "samples_ts": [
          91.2155
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:29:22Z",
        "avg_ns": 1426945879,
        "stddev_ns": 0,
        "avg_ts": 89.702071,
        "stddev_ts": 0.0,
        "samples_ns": [
          1426945879
        ],
        "samples_ts": [
          89.7021
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:29:48Z",
        "avg_ns": 1557877788,
        "stddev_ns": 0,
        "avg_ts": 82.163056,
        "stddev_ts": 0.0,
        "samples_ns": [
          1557877788
        ],
        "samples_ts": [
          82.1631
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T17:30:38Z",
        "avg_ns": 1797883423,
        "stddev_ns": 0,
        "avg_ts": 71.194827,
        "stddev_ts": 0.0,
        "samples_ns": [
          1797883423
        ],
        "samples_ts": [
          71.1948
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk f16 -ctv f16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.26\\.ffn_(gate|gate_up|down).*=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk f16 -ctv f16 -ngl 41 -ot 'blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "f16"
        ],
        [
          "-ctv",
          "f16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:32:44Z",
        "avg_ns": 1624006114,
        "stddev_ns": 0,
        "avg_ts": 78.817437,
        "stddev_ts": 0.0,
        "samples_ns": [
          1624006114
        ],
        "samples_ts": [
          78.8174
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:32:53Z",
        "avg_ns": 1638417622,
        "stddev_ns": 0,
        "avg_ts": 78.12416,
        "stddev_ts": 0.0,
        "samples_ns": [
          1638417622
        ],
        "samples_ts": [
          78.1242
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:33:06Z",
        "avg_ns": 1664376041,
        "stddev_ns": 0,
        "avg_ts": 76.905697,
        "stddev_ts": 0.0,
        "samples_ns": [
          1664376041
        ],
        "samples_ts": [
          76.9057
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:33:24Z",
        "avg_ns": 1687071971,
        "stddev_ns": 0,
        "avg_ts": 75.871096,
        "stddev_ts": 0.0,
        "samples_ns": [
          1687071971
        ],
        "samples_ts": [
          75.8711
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:33:55Z",
        "avg_ns": 1794318810,
        "stddev_ns": 0,
        "avg_ts": 71.336264,
        "stddev_ts": 0.0,
        "samples_ns": [
          1794318810
        ],
        "samples_ts": [
          71.3363
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T17:34:52Z",
        "avg_ns": 2052247920,
        "stddev_ns": 0,
        "avg_ts": 62.370632,
        "stddev_ts": 0.0,
        "samples_ns": [
          2052247920
        ],
        "samples_ts": [
          62.3706
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "f16",
        "type_v": "f16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.26\\.ffn_(gate|gate_up|down).*=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-09T17:36:51Z",
        "avg_ns": 2447843586,
        "stddev_ns": 0,
        "avg_ts": 52.290923,
        "stddev_ts": 0.0,
        "samples_ns": [
          2447843586
        ],
        "samples_ts": [
          52.2909
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 41 -ot 'blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:41:53Z",
        "avg_ns": 1281393311,
        "stddev_ns": 0,
        "avg_ts": 99.891266,
        "stddev_ts": 0.0,
        "samples_ns": [
          1281393311
        ],
        "samples_ts": [
          99.8913
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:42:02Z",
        "avg_ns": 1301816444,
        "stddev_ns": 0,
        "avg_ts": 98.324154,
        "stddev_ts": 0.0,
        "samples_ns": [
          1301816444
        ],
        "samples_ts": [
          98.3242
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:42:12Z",
        "avg_ns": 1364836639,
        "stddev_ns": 0,
        "avg_ts": 93.784118,
        "stddev_ts": 0.0,
        "samples_ns": [
          1364836639
        ],
        "samples_ts": [
          93.7841
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:42:28Z",
        "avg_ns": 1507065885,
        "stddev_ns": 0,
        "avg_ts": 84.933248,
        "stddev_ts": 0.0,
        "samples_ns": [
          1507065885
        ],
        "samples_ts": [
          84.9332
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:42:53Z",
        "avg_ns": 1781520728,
        "stddev_ns": 0,
        "avg_ts": 71.848729,
        "stddev_ts": 0.0,
        "samples_ns": [
          1781520728
        ],
        "samples_ts": [
          71.8487
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.33\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T17:43:40Z",
        "avg_ns": 2370163080,
        "stddev_ns": 0,
        "avg_ts": 54.004723,
        "stddev_ts": 0.0,
        "samples_ns": [
          2370163080
        ],
        "samples_ts": [
          54.0047
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk q8_0 -ctv q8_0 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk q8_0 -ctv q8_0 -ngl 41 -ot 'blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "q8_0"
        ],
        [
          "-ctv",
          "q8_0"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:45:37Z",
        "avg_ns": 1446603442,
        "stddev_ns": 0,
        "avg_ts": 88.48313,
        "stddev_ts": 0.0,
        "samples_ns": [
          1446603442
        ],
        "samples_ts": [
          88.4831
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:45:46Z",
        "avg_ns": 1471188960,
        "stddev_ns": 0,
        "avg_ts": 87.004459,
        "stddev_ts": 0.0,
        "samples_ns": [
          1471188960
        ],
        "samples_ts": [
          87.0045
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:45:58Z",
        "avg_ns": 1528705252,
        "stddev_ns": 0,
        "avg_ts": 83.730987,
        "stddev_ts": 0.0,
        "samples_ns": [
          1528705252
        ],
        "samples_ts": [
          83.731
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:46:15Z",
        "avg_ns": 1651253521,
        "stddev_ns": 0,
        "avg_ts": 77.516867,
        "stddev_ts": 0.0,
        "samples_ns": [
          1651253521
        ],
        "samples_ts": [
          77.5169
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:46:42Z",
        "avg_ns": 2066306781,
        "stddev_ns": 0,
        "avg_ts": 61.946271,
        "stddev_ts": 0.0,
        "samples_ns": [
          2066306781
        ],
        "samples_ts": [
          61.9463
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T17:47:33Z",
        "avg_ns": 2478701695,
        "stddev_ns": 0,
        "avg_ts": 51.639937,
        "stddev_ts": 0.0,
        "samples_ns": [
          2478701695
        ],
        "samples_ts": [
          51.6399
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "q8_0",
        "type_v": "q8_0",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.30\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-09T17:49:22Z",
        "avg_ns": 3652661228,
        "stddev_ns": 0,
        "avg_ts": 35.042943,
        "stddev_ts": 0.0,
        "samples_ns": [
          3652661228
        ],
        "samples_ts": [
          35.0429
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 131072,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 131072",
    "fit_result_params": "-c 131072 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "131072"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:54:03Z",
        "avg_ns": 1385932674,
        "stddev_ns": 0,
        "avg_ts": 92.356579,
        "stddev_ts": 0.0,
        "samples_ns": [
          1385932674
        ],
        "samples_ts": [
          92.3566
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:54:12Z",
        "avg_ns": 1427576692,
        "stddev_ns": 0,
        "avg_ts": 89.662433,
        "stddev_ts": 0.0,
        "samples_ns": [
          1427576692
        ],
        "samples_ts": [
          89.6624
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:54:23Z",
        "avg_ns": 1520830639,
        "stddev_ns": 0,
        "avg_ts": 84.164533,
        "stddev_ts": 0.0,
        "samples_ns": [
          1520830639
        ],
        "samples_ts": [
          84.1645
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:54:40Z",
        "avg_ns": 1718080604,
        "stddev_ns": 0,
        "avg_ts": 74.501743,
        "stddev_ts": 0.0,
        "samples_ns": [
          1718080604
        ],
        "samples_ts": [
          74.5017
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:55:07Z",
        "avg_ns": 2112155248,
        "stddev_ns": 0,
        "avg_ts": 60.601606,
        "stddev_ts": 0.0,
        "samples_ns": [
          2112155248
        ],
        "samples_ts": [
          60.6016
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.31\\.ffn_(gate|up|gate_up|down).*=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T17:55:57Z",
        "avg_ns": 2911877270,
        "stddev_ns": 0,
        "avg_ts": 43.957897,
        "stddev_ts": 0.0,
        "samples_ns": [
          2911877270
        ],
        "samples_ts": [
          43.9579
        ]
      }
    ]
  },
  {
    "hf_model": "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL",
    "hf_repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "quantization": "UD-Q5_K_XL",
    "notes": "",
    "context_size": 262144,
    "base_params": "-fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa on -ctk bf16 -ctv bf16 -fit on -c 262144",
    "fit_result_params": "-c 262144 -ngl 41 -ot 'blk\\.25\\.ffn_down.*=CPU,blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU,blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "bench_params": "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q5_K_XL -fa 1 -ctk bf16 -ctv bf16 -ngl 41 -ot 'blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU'",
    "params": {
      "fit": [
        [
          "-fa",
          "on"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-fit",
          "on"
        ],
        [
          "-c",
          "262144"
        ]
      ],
      "bench": [
        [
          "-fa",
          "1"
        ],
        [
          "-ctk",
          "bf16"
        ],
        [
          "-ctv",
          "bf16"
        ],
        [
          "-ngl",
          "41"
        ],
        [
          "-ot",
          "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU"
        ]
      ]
    },
    "perf": [
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 4096,
        "test_time": "2026-06-09T17:58:04Z",
        "avg_ns": 1639597643,
        "stddev_ns": 0,
        "avg_ts": 78.067934,
        "stddev_ts": 0.0,
        "samples_ns": [
          1639597643
        ],
        "samples_ts": [
          78.0679
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 8192,
        "test_time": "2026-06-09T17:58:14Z",
        "avg_ns": 1682905610,
        "stddev_ns": 0,
        "avg_ts": 76.05893,
        "stddev_ts": 0.0,
        "samples_ns": [
          1682905610
        ],
        "samples_ts": [
          76.0589
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 16384,
        "test_time": "2026-06-09T17:58:27Z",
        "avg_ns": 1771534797,
        "stddev_ns": 0,
        "avg_ts": 72.253732,
        "stddev_ts": 0.0,
        "samples_ns": [
          1771534797
        ],
        "samples_ts": [
          72.2537
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 32768,
        "test_time": "2026-06-09T17:58:46Z",
        "avg_ns": 1974024230,
        "stddev_ns": 0,
        "avg_ts": 64.842163,
        "stddev_ts": 0.0,
        "samples_ns": [
          1974024230
        ],
        "samples_ts": [
          64.8422
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 65536,
        "test_time": "2026-06-09T17:59:17Z",
        "avg_ns": 2350190270,
        "stddev_ns": 0,
        "avg_ts": 54.463675,
        "stddev_ts": 0.0,
        "samples_ns": [
          2350190270
        ],
        "samples_ts": [
          54.4637
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 131072,
        "test_time": "2026-06-09T18:00:16Z",
        "avg_ns": 3137430456,
        "stddev_ns": 0,
        "avg_ts": 40.797717,
        "stddev_ts": 0.0,
        "samples_ns": [
          3137430456
        ],
        "samples_ts": [
          40.7977
        ]
      },
      {
        "build_commit": "98d5e8ba8",
        "build_number": 9544,
        "cpu_info": "AMD Ryzen 7 5800X 8-Core Processor",
        "gpu_info": "NVIDIA GeForce RTX 3090",
        "backends": "CUDA",
        "model_filename": "/models/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/a483e9e6cbd595906af30beda3187c2663a1118c/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
        "model_type": "qwen35moe 35B.A3B Q5_K - Medium",
        "model_size": 26581518848,
        "model_n_params": 34660610688,
        "n_batch": 2048,
        "n_ubatch": 512,
        "n_threads": 8,
        "cpu_mask": "0x0",
        "cpu_strict": false,
        "poll": 50,
        "type_k": "bf16",
        "type_v": "bf16",
        "n_gpu_layers": 41,
        "n_cpu_moe": 0,
        "split_mode": "layer",
        "main_gpu": 0,
        "no_kv_offload": false,
        "flash_attn": 1,
        "devices": "auto",
        "tensor_split": "0.00",
        "tensor_buft_overrides": "blk\\.25\\.ffn_down.*=CPU;blk\\.26\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.27\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.28\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.29\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.30\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.31\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.32\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.33\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.34\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.35\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.36\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.37\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.38\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.39\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU;blk\\.40\\.ffn_(up|down|gate_up|gate)_(ch|)exps=CPU",
        "use_mmap": false,
        "use_direct_io": false,
        "embeddings": false,
        "no_op_offload": 0,
        "no_host": false,
        "fit_target": 0,
        "fit_min_ctx": 0,
        "n_prompt": 0,
        "n_gen": 128,
        "n_depth": 262144,
        "test_time": "2026-06-09T18:02:20Z",
        "avg_ns": 4737187967,
        "stddev_ns": 0,
        "avg_ts": 27.020249,
        "stddev_ts": 0.0,
        "samples_ns": [
          4737187967
        ],
        "samples_ts": [
          27.0202
        ]
      }
    ]
  }
]
