> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cekura.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Overview

> Introduction to Evaluators on Cekura

export const CopyPageButton = () => {
  if (typeof window !== 'undefined') {
    setTimeout(function () {
      if (document.getElementById('ck-tools')) return;
      var anchor = document.getElementById('content-area') || document.querySelector('.mdx-content');
      if (!anchor) return;
      if (!document.getElementById('ck-style')) {
        var s = document.createElement('style');
        s.id = 'ck-style';
        s.textContent = '#ck-tools{position:absolute;top:6px;right:0;z-index:100;font-family:inherit;}' + '.ck-row{display:inline-flex;align-items:stretch;border:1px solid rgba(0,0,0,0.15);border-radius:8px;overflow:hidden;background:#fff;}' + ':root.dark .ck-row{background:rgba(255,255,255,0.06);border-color:rgba(255,255,255,0.12);}' + '.ck-btn{padding:5px 12px;border:none;background:none;cursor:pointer;font-size:13px;font-weight:500;font-family:inherit;color:#374151;}' + ':root.dark .ck-btn{color:#d1d5db;}' + '.ck-btn:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-btn:hover{background:rgba(255,255,255,0.06);}' + '.ck-chevron{padding:5px 8px;border:none;background:none;cursor:pointer;font-size:14px;font-family:inherit;color:#374151;}' + ':root.dark .ck-chevron{color:#d1d5db;}' + '.ck-chevron:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-chevron:hover{background:rgba(255,255,255,0.06);}' + '.ck-divider{width:1px;background:rgba(0,0,0,0.12);flex-shrink:0;}' + ':root.dark .ck-divider{background:rgba(255,255,255,0.12);}' + '.ck-dd{position:absolute;top:calc(100% + 4px);right:0;min-width:180px;background:#fff;border:1px solid rgba(0,0,0,0.12);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,0.1);padding:4px;display:none;z-index:200;}' + ':root.dark .ck-dd{background:#1f2937;border-color:rgba(255,255,255,0.1);box-shadow:0 4px 16px rgba(0,0,0,0.35);}' + '.ck-item{display:block;width:100%;padding:7px 12px;border:none;background:none;border-radius:6px;cursor:pointer;font-size:13px;font-family:inherit;text-align:left;color:#374151;}' + ':root.dark .ck-item{color:#d1d5db;}' + '.ck-item:hover{background:rgba(0,0,0,0.05);}' + ':root.dark .ck-item:hover{background:rgba(255,255,255,0.07);}';
        document.head.appendChild(s);
      }
      var wrap = document.createElement('div');
      wrap.id = 'ck-tools';
      var row = document.createElement('div');
      row.className = 'ck-row';
      var mainBtn = document.createElement('button');
      mainBtn.className = 'ck-btn';
      mainBtn.textContent = 'Copy page';
      var divider = document.createElement('span');
      divider.className = 'ck-divider';
      var chevron = document.createElement('button');
      chevron.className = 'ck-chevron';
      chevron.textContent = '▾';
      var dd = document.createElement('div');
      dd.className = 'ck-dd';
      function closeDD() {
        dd.style.display = 'none';
      }
      function openDD() {
        dd.style.display = 'block';
      }
      chevron.onclick = function (e) {
        e.stopPropagation();
        if (dd.style.display === 'block') {
          closeDD();
        } else {
          openDD();
        }
      };
      document.addEventListener('click', function (e) {
        if (!e.target.closest('#ck-tools')) {
          closeDD();
        }
      });
      document.addEventListener('keydown', function (e) {
        if (e.key === 'Escape') {
          closeDD();
        }
      });
      function makeItem(label, fn) {
        var b = document.createElement('button');
        b.className = 'ck-item';
        b.textContent = label;
        b.onclick = function () {
          fn();
          closeDD();
        };
        return b;
      }
      function getMarkdown() {
        var walk = function (node) {
          if (!node) return '';
          if (node.nodeType === 3) return node.textContent || '';
          if (node.nodeType !== 1) return '';
          var tag = node.tagName.toLowerCase();
          var skip = ['script', 'style', 'svg', 'noscript', 'button', 'iframe'];
          if (skip.indexOf(tag) !== -1) return '';
          if (node.id === 'ck-tools') return '';
          var ch = Array.from(node.childNodes).map(walk).join('');
          if (tag === 'h1') return '\n# ' + ch.trim() + '\n\n';
          if (tag === 'h2') return '\n## ' + ch.trim() + '\n\n';
          if (tag === 'h3') return '\n### ' + ch.trim() + '\n\n';
          if (tag === 'p') return '\n' + ch.trim() + '\n\n';
          if (tag === 'pre') return '\n```\n' + node.textContent.trim() + '\n```\n\n';
          if (tag === 'li') return '- ' + ch.trim() + '\n';
          if (tag === 'code') return '`' + ch.trim() + '`';
          return ch;
        };
        var content = document.querySelector('.mdx-content') || document.getElementById('content-area') || document.body;
        return walk(content).replace(/\n\n\n+/g, '\n\n').trim();
      }
      function copyMd() {
        var md = getMarkdown();
        navigator.clipboard.writeText(md).then(function () {
          mainBtn.textContent = 'Copied!';
          setTimeout(function () {
            mainBtn.textContent = 'Copy page';
          }, 2000);
        });
      }
      function viewMd() {
        var md = getMarkdown();
        var safe = md.split('&').join('&amp;').split('<').join('&lt;').split('>').join('&gt;');
        var html = '<!DOCTYPE html><html><head><meta charset="utf-8"><style>body{font-family:monospace;max-width:860px;margin:40px auto;padding:0 24px;line-height:1.7;white-space:pre-wrap;word-wrap:break-word}</style></head><body>' + safe + '</body></html>';
        window.open(URL.createObjectURL(new Blob([html], {
          type: 'text/html'
        })), '_blank');
      }
      function openClaude() {
        var prompt = 'Can you read this Cekura docs page ' + window.location.href + ' so I can ask you questions?';
        window.open('https://claude.ai/new?q=' + encodeURIComponent(prompt), '_blank');
      }
      mainBtn.onclick = copyMd;
      dd.appendChild(makeItem('Copy page', copyMd));
      dd.appendChild(makeItem('View as Markdown', viewMd));
      dd.appendChild(makeItem('Open in Claude', openClaude));
      row.appendChild(mainBtn);
      row.appendChild(divider);
      row.appendChild(chevron);
      wrap.appendChild(row);
      wrap.appendChild(dd);
      anchor.style.position = 'relative';
      anchor.insertBefore(wrap, anchor.firstChild);
    }, 50);
  }
  return null;
};

<CopyPageButton />

## What are Evaluators?

Evaluators are like test cases for your AI voice agents. Each evaluator simulates a conversation with your agent to systematically test its performance and behavior.

### Evaluator Components

An evaluator is composed of five key components:

<CardGroup cols={2}>
  <Card title="Instructions" icon="list-check" color="#A6A7EA" href="#instructions">
    Define how the evaluator behaves during conversations
  </Card>

  <Card title="Expected Outcome" icon="bullseye" color="#A6A7EA" href="#expected-outcome">
    The desired result that indicates a successful conversation
  </Card>

  <Card title="Metrics" icon="chart-line" color="#A6A7EA" href="#metrics">
    Measurements like latency, relevancy, and consistency
  </Card>

  <Card title="Personality" icon="user" color="#A6A7EA" href="#personality">
    Language, tone, and behavioral characteristics
  </Card>

  <Card title="Test Profile" icon="id-card" color="#A6A7EA" href="#test-profiles">
    Identity information like name, DOB, and address (optional)
  </Card>
</CardGroup>

## How Evaluators Work

### Instructions

Each evaluator has a set of instructions that define how it should behave during a simulation run. These instructions guide the evaluator's conversation flow, what information to provide, and how to respond to your agent.

During a simulation run, evaluators follow their instructions to engage in realistic conversations with your agent. In the transcript of a simulation run, dialogues said by evaluators are labeled as **Testing Agents**.

**Example Instructions:**

* "Call to cancel an appointment scheduled for next Tuesday"
* "Inquire about store hours and ask about product availability"
* "Request a refund for order #12345 and escalate if initially denied"

[See more instruction examples](/documentation/guides/prompting#scenario-examples).

## Expected Outcome

Evaluators have an **expected outcome** defined, which represents what should happen in a successful conversation. Once your evaluators complete a conversation with your agent, we evaluate those conversations to give you a report of how your agent performed.

Expected outcome is one key metric we evaluate the conversation on. This metric tells whether your AI Agent (called **Main Agent** on Cekura) did what it was supposed to do in the conversation.

### Expected Outcome Examples

| Scenario                 | Expected Outcome                                                                  |
| ------------------------ | --------------------------------------------------------------------------------- |
| Appointment Cancellation | The agent successfully cancels the appointment and provides a confirmation number |
| Product Inquiry          | The agent provides accurate product information and store hours                   |
| Refund Request           | The agent processes the refund request and provides a timeline for processing     |
| Account Verification     | The agent verifies the customer's identity and provides account information       |

The expected outcome is evaluated as either met or not met, giving you clear visibility into whether your agent is performing as intended.

[See more expected outcome examples](/documentation/guides/prompting#expected-output).

## Metrics

Each evaluator also has a set of metrics attached to it. These metrics are computed alongside the expected outcome and provide deeper insights into conversation quality, agent behavior, and user experience.

You can find these metrics in the metrics section of your simulation results. [Read more about metrics here](/documentation/key-concepts/metrics/overview).

Common metrics include:

* Latency
* Infrastructure Issues
* Relevancy
* Consistency

<Note>
  To define which metrics must pass for an evaluation to be marked as successful, configure the [Rubric](/documentation/key-concepts/metrics/rubric) for your project.
</Note>

## Personality

An evaluator has a **personality** attached to it, which determines the language of the evaluator and other behavioral characteristics such as:

* Whether the conversation will have background noise
* Interruption patterns
* Speaking pace
* Emotional tone

Personalities help you test your agent against different types of users and real-world conditions. [Read more about personalities here](/documentation/key-concepts/evaluators/personality).

## Test Profiles

An evaluator can have a **test profile** attached to it. A test profile gives the evaluator an identity including information like:

* Name
* Date of birth
* Address
* Phone number
* Other relevant personal information

### When to Use Test Profiles

You would want to use a test profile if your AI agent needs the counterparty to provide specific information like name, date of birth, address, etc., to complete its task.

### Example: Clinic Receptionist

If you have a clinic receptionist agent that can help with cancelling appointments, it will likely need to verify the counterparty before proceeding with cancellation.

**Setup Process:**

1. Create mock appointments in your system with specific test data (e.g., date of birth: January 1, 2000)
2. Create a test profile with the same information (date of birth: January 1, 2000)
3. Attach the test profile to your evaluator

**How It Works:**

When the evaluator (acting as a test agent) holds a conversation with your agent, it will provide the date of birth from its test profile. Your AI agent can then use this information to look up the desired appointment and proceed with the cancellation.

This ensures consistent, reliable testing of verification flows and identity-dependent features in your agent.

[Read more about test profiles here](/documentation/key-concepts/evaluators/test-profile).

<Note>
  **Optimise Prompt** — When you select one or more evaluator rows in the Evaluators tab, an **Optimise Prompt** button appears in the bulk-action bar (next to Run and the delete control). Clicking it launches the AI Assistant to diagnose failing evaluators and iterate on your agent's prompt until they pass. [See the full guide](/documentation/guides/optimise-prompt).
</Note>