> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cekura.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Auto-optimise Metrics on a Schedule

> Run the Metric Lab optimiser on a recurring schedule with a Claude Code routine — no UI clicks required.

export const CopyPageButton = () => {
  if (typeof window !== 'undefined') {
    setTimeout(function () {
      if (document.getElementById('ck-tools')) return;
      var anchor = document.getElementById('content-area') || document.querySelector('.mdx-content');
      if (!anchor) return;
      if (!document.getElementById('ck-style')) {
        var s = document.createElement('style');
        s.id = 'ck-style';
        s.textContent = '#ck-tools{position:absolute;top:6px;right:0;z-index:100;font-family:inherit;}' + '.ck-row{display:inline-flex;align-items:stretch;border:1px solid rgba(0,0,0,0.15);border-radius:8px;overflow:hidden;background:#fff;}' + ':root.dark .ck-row{background:rgba(255,255,255,0.06);border-color:rgba(255,255,255,0.12);}' + '.ck-btn{padding:5px 12px;border:none;background:none;cursor:pointer;font-size:13px;font-weight:500;font-family:inherit;color:#374151;}' + ':root.dark .ck-btn{color:#d1d5db;}' + '.ck-btn:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-btn:hover{background:rgba(255,255,255,0.06);}' + '.ck-chevron{padding:5px 8px;border:none;background:none;cursor:pointer;font-size:14px;font-family:inherit;color:#374151;}' + ':root.dark .ck-chevron{color:#d1d5db;}' + '.ck-chevron:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-chevron:hover{background:rgba(255,255,255,0.06);}' + '.ck-divider{width:1px;background:rgba(0,0,0,0.12);flex-shrink:0;}' + ':root.dark .ck-divider{background:rgba(255,255,255,0.12);}' + '.ck-dd{position:absolute;top:calc(100% + 4px);right:0;min-width:180px;background:#fff;border:1px solid rgba(0,0,0,0.12);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,0.1);padding:4px;display:none;z-index:200;}' + ':root.dark .ck-dd{background:#1f2937;border-color:rgba(255,255,255,0.1);box-shadow:0 4px 16px rgba(0,0,0,0.35);}' + '.ck-item{display:block;width:100%;padding:7px 12px;border:none;background:none;border-radius:6px;cursor:pointer;font-size:13px;font-family:inherit;text-align:left;color:#374151;}' + ':root.dark .ck-item{color:#d1d5db;}' + '.ck-item:hover{background:rgba(0,0,0,0.05);}' + ':root.dark .ck-item:hover{background:rgba(255,255,255,0.07);}';
        document.head.appendChild(s);
      }
      var wrap = document.createElement('div');
      wrap.id = 'ck-tools';
      var row = document.createElement('div');
      row.className = 'ck-row';
      var mainBtn = document.createElement('button');
      mainBtn.className = 'ck-btn';
      mainBtn.textContent = 'Copy page';
      var divider = document.createElement('span');
      divider.className = 'ck-divider';
      var chevron = document.createElement('button');
      chevron.className = 'ck-chevron';
      chevron.textContent = '▾';
      var dd = document.createElement('div');
      dd.className = 'ck-dd';
      function closeDD() {
        dd.style.display = 'none';
      }
      function openDD() {
        dd.style.display = 'block';
      }
      chevron.onclick = function (e) {
        e.stopPropagation();
        if (dd.style.display === 'block') {
          closeDD();
        } else {
          openDD();
        }
      };
      document.addEventListener('click', function (e) {
        if (!e.target.closest('#ck-tools')) {
          closeDD();
        }
      });
      document.addEventListener('keydown', function (e) {
        if (e.key === 'Escape') {
          closeDD();
        }
      });
      function makeItem(label, fn) {
        var b = document.createElement('button');
        b.className = 'ck-item';
        b.textContent = label;
        b.onclick = function () {
          fn();
          closeDD();
        };
        return b;
      }
      function getMarkdown() {
        var walk = function (node) {
          if (!node) return '';
          if (node.nodeType === 3) return node.textContent || '';
          if (node.nodeType !== 1) return '';
          var tag = node.tagName.toLowerCase();
          var skip = ['script', 'style', 'svg', 'noscript', 'button', 'iframe'];
          if (skip.indexOf(tag) !== -1) return '';
          if (node.id === 'ck-tools') return '';
          var ch = Array.from(node.childNodes).map(walk).join('');
          if (tag === 'h1') return '\n# ' + ch.trim() + '\n\n';
          if (tag === 'h2') return '\n## ' + ch.trim() + '\n\n';
          if (tag === 'h3') return '\n### ' + ch.trim() + '\n\n';
          if (tag === 'p') return '\n' + ch.trim() + '\n\n';
          if (tag === 'pre') return '\n```\n' + node.textContent.trim() + '\n```\n\n';
          if (tag === 'li') return '- ' + ch.trim() + '\n';
          if (tag === 'code') return '`' + ch.trim() + '`';
          return ch;
        };
        var content = document.querySelector('.mdx-content') || document.getElementById('content-area') || document.body;
        return walk(content).replace(/\n\n\n+/g, '\n\n').trim();
      }
      function copyMd() {
        var md = getMarkdown();
        navigator.clipboard.writeText(md).then(function () {
          mainBtn.textContent = 'Copied!';
          setTimeout(function () {
            mainBtn.textContent = 'Copy page';
          }, 2000);
        });
      }
      function viewMd() {
        var md = getMarkdown();
        var safe = md.split('&').join('&amp;').split('<').join('&lt;').split('>').join('&gt;');
        var html = '<!DOCTYPE html><html><head><meta charset="utf-8"><style>body{font-family:monospace;max-width:860px;margin:40px auto;padding:0 24px;line-height:1.7;white-space:pre-wrap;word-wrap:break-word}</style></head><body>' + safe + '</body></html>';
        window.open(URL.createObjectURL(new Blob([html], {
          type: 'text/html'
        })), '_blank');
      }
      function openClaude() {
        var prompt = 'Can you read this Cekura docs page ' + window.location.href + ' so I can ask you questions?';
        window.open('https://claude.ai/new?q=' + encodeURIComponent(prompt), '_blank');
      }
      mainBtn.onclick = copyMd;
      dd.appendChild(makeItem('Copy page', copyMd));
      dd.appendChild(makeItem('View as Markdown', viewMd));
      dd.appendChild(makeItem('Open in Claude', openClaude));
      row.appendChild(mainBtn);
      row.appendChild(divider);
      row.appendChild(chevron);
      wrap.appendChild(row);
      wrap.appendChild(dd);
      anchor.style.position = 'relative';
      anchor.insertBefore(wrap, anchor.firstChild);
    }, 50);
  }
  return null;
};

<CopyPageButton />

The [Metric Lab](/documentation/guides/metric-lab) **Auto Improve** button rewrites a metric's prompt based on the feedback annotations on its test sets. This guide shows how to run that same optimiser on a recurring schedule using a [Claude Code](https://www.claude.com/product/claude-code) routine and the Cekura MCP — so newly-annotated test sets feed back into your metric prompts automatically.

<Note>
  **Optimiser input is human feedback, not raw test sets.** The optimiser reads the annotations and notes you've left on `MetricReview` rows. If no new feedback has been added since the last run, re-running will not change the prompt. Treat this as a follow-on to your existing labelling cadence.
</Note>

## Prerequisites

<Steps>
  <Step title="Connect Claude Code to the Cekura MCP">
    Follow the [Claude Code setup guide](/mcp/claude-code-guide) and run `/setup-mcp`. OAuth is recommended.
  </Step>

  <Step title="Have at least one metric with labelled test sets">
    The optimiser needs calls you've **Added to Lab** and **Annotated** through the [Metric Lab workflow](/documentation/guides/metric-lab#annotate) — annotations and feedback are what the optimiser learns from.
  </Step>

  <Step title="Note the metric IDs you want the routine to operate on">
    Open each metric in the dashboard and copy the **metric ID** from the URL (`/metrics/<id>`). That's the only ID you need — the optimiser uses every test set already in the metric's Lab automatically.
  </Step>
</Steps>

## The routine prompt

Paste this into Claude Code, filling in the metric IDs. It chains the MCP tools the optimiser needs end-to-end.

```
You are auto-optimising Cekura metric prompts based on accumulated test-set
feedback. Run this for each metric_id below.

Metrics to optimise: [12345, 12346]

For each metric_id:

1. Call `metric_reviews_process_feedbacks` with just that metric_id. The
   optimiser will use every test set already in the metric's Lab. Capture
   the returned `progress_id`.

2. Poll `metric_reviews_process_feedbacks_progress` with that progress_id
   roughly every 30s until status is "success" or "error". If "error", stop
   and report the error for that metric — continue to the next one.

3. On success, read these fields from the progress output:
   - `output.improved_metric_description` and `output.improved_evaluation_trigger`
   - `output.meta_harness.optimized_code` and `output.meta_harness.type`
   - `output.meta_harness.score`, `num_correct`, `num_total` (the score after
     optimisation against the labelled set)

   Call `metrics_retrieve` for the same metric_id to read the *current*
   `description`, `evaluation_trigger`, `type`, and `custom_code`. Produce a
   unified diff between current and proposed for each of those fields. Also
   report the post-optimisation score (e.g. `7/7`) and whether the metric
   type changed (e.g. `basic` → `custom_code`).

4. Do NOT call `metrics_partial_update` yet. Print the diff and the score
   and stop for this metric.

After processing every metric, summarise: which had proposed changes, which
were unchanged, which errored. Do not apply any changes.
```

<Tip>
  Step 4 deliberately stops short of saving. The optimiser sometimes proposes large rewrites — keep a human in the loop for the first few runs before letting the routine call `metrics_partial_update` directly.
</Tip>

## Schedule it

Once the prompt produces clean diffs you're comfortable with, schedule it with Claude Code's `/schedule` slash command:

```
/schedule weekly on Monday at 09:00

<paste the routine prompt above>
```

Claude Code will fire the routine on that cron and surface the diffs in your inbox / Claude Code session each time it runs. See [Claude Code Routines](https://docs.claude.com/en/docs/claude-code/automation) for the full slash-command reference.

A typical cadence:

* **Weekly** if your team labels reviews regularly (recommended).
* **Daily** only if you have an active labelling workflow producing dozens of new annotations per day.
* **Monthly** if labelling is bursty (e.g. quarterly audits).

## Auto-apply (advanced)

Once you trust the routine, swap step 4 for:

```
4. Call `metrics_partial_update` with the metric_id and every changed
   field from step 3 — typically some combination of `description`,
   `evaluation_trigger`, `type`, and `custom_code`. The optimiser may
   convert a `basic` metric into a `custom_code` metric, so pass both
   `type` and `custom_code` when they differ from current.

   Then call `metrics_run_reviews_create` for the same metric to re-score
   every linked test set against the new prompt. Report the before/after
   score delta from the run.
```

This applies the new prompt and immediately verifies it didn't regress the labelled set. If the score delta is negative for any metric, revert manually from the Metric Lab UI.

<Note>
  **`metrics_partial_update` is destructive in effect.** It overwrites the live prompt your production agent evaluates against. Always run the read-only version of the routine for a week or two before enabling auto-apply.
</Note>

## How it maps to the Metric Lab UI

| Routine step                                | Equivalent in the [Metric Lab UI](/documentation/guides/metric-lab) |
| ------------------------------------------- | ------------------------------------------------------------------- |
| `metric_reviews_process_feedbacks`          | Clicking **Auto Improve**                                           |
| `metric_reviews_process_feedbacks_progress` | The progress panel polling that task                                |
| Reviewing the diff before saving            | **View Changes** → diff view                                        |
| `metrics_partial_update`                    | **Save**                                                            |
| `metrics_run_reviews_create`                | **Run** (re-score the test set)                                     |

## Troubleshooting

**The routine reports "no changes proposed" every run.** No new feedback has been added since the last run. The optimiser is deterministic on a fixed set of annotations.

**The `improved_metric_description` looks identical to current, but the score went up.** The optimiser sometimes leaves the description untouched and instead converts the metric into a `custom_code` wrapper around an enhanced prompt. Check `output.meta_harness.optimized_code` and `output.meta_harness.type` — that's where the real change lives. Step 3 above already diffs these fields.

**Progress polling times out.** The optimiser can take several minutes for metrics with many test sets. Increase the polling interval or raise your routine's timeout — do not retry mid-flight, that will start a second concurrent optimisation.

**I want to optimise against a specific subset of test sets, not the whole Lab.** Pass `test_set_ids` explicitly in step 1 instead of omitting it. The optimiser will use exactly those IDs.
