> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cekura.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Suggested Testing Approach

> A proven workflow to build reliable test cases for your AI voice agents

export const CopyPageButton = () => {
  if (typeof window !== 'undefined') {
    setTimeout(function () {
      if (document.getElementById('ck-tools')) return;
      var anchor = document.getElementById('content-area') || document.querySelector('.mdx-content');
      if (!anchor) return;
      if (!document.getElementById('ck-style')) {
        var s = document.createElement('style');
        s.id = 'ck-style';
        s.textContent = '#ck-tools{position:absolute;top:6px;right:0;z-index:100;font-family:inherit;}' + '.ck-row{display:inline-flex;align-items:stretch;border:1px solid rgba(0,0,0,0.15);border-radius:8px;overflow:hidden;background:#fff;}' + ':root.dark .ck-row{background:rgba(255,255,255,0.06);border-color:rgba(255,255,255,0.12);}' + '.ck-btn{padding:5px 12px;border:none;background:none;cursor:pointer;font-size:13px;font-weight:500;font-family:inherit;color:#374151;}' + ':root.dark .ck-btn{color:#d1d5db;}' + '.ck-btn:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-btn:hover{background:rgba(255,255,255,0.06);}' + '.ck-chevron{padding:5px 8px;border:none;background:none;cursor:pointer;font-size:14px;font-family:inherit;color:#374151;}' + ':root.dark .ck-chevron{color:#d1d5db;}' + '.ck-chevron:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-chevron:hover{background:rgba(255,255,255,0.06);}' + '.ck-divider{width:1px;background:rgba(0,0,0,0.12);flex-shrink:0;}' + ':root.dark .ck-divider{background:rgba(255,255,255,0.12);}' + '.ck-dd{position:absolute;top:calc(100% + 4px);right:0;min-width:180px;background:#fff;border:1px solid rgba(0,0,0,0.12);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,0.1);padding:4px;display:none;z-index:200;}' + ':root.dark .ck-dd{background:#1f2937;border-color:rgba(255,255,255,0.1);box-shadow:0 4px 16px rgba(0,0,0,0.35);}' + '.ck-item{display:block;width:100%;padding:7px 12px;border:none;background:none;border-radius:6px;cursor:pointer;font-size:13px;font-family:inherit;text-align:left;color:#374151;}' + ':root.dark .ck-item{color:#d1d5db;}' + '.ck-item:hover{background:rgba(0,0,0,0.05);}' + ':root.dark .ck-item:hover{background:rgba(255,255,255,0.07);}';
        document.head.appendChild(s);
      }
      var wrap = document.createElement('div');
      wrap.id = 'ck-tools';
      var row = document.createElement('div');
      row.className = 'ck-row';
      var mainBtn = document.createElement('button');
      mainBtn.className = 'ck-btn';
      mainBtn.textContent = 'Copy page';
      var divider = document.createElement('span');
      divider.className = 'ck-divider';
      var chevron = document.createElement('button');
      chevron.className = 'ck-chevron';
      chevron.textContent = '▾';
      var dd = document.createElement('div');
      dd.className = 'ck-dd';
      function closeDD() {
        dd.style.display = 'none';
      }
      function openDD() {
        dd.style.display = 'block';
      }
      chevron.onclick = function (e) {
        e.stopPropagation();
        if (dd.style.display === 'block') {
          closeDD();
        } else {
          openDD();
        }
      };
      document.addEventListener('click', function (e) {
        if (!e.target.closest('#ck-tools')) {
          closeDD();
        }
      });
      document.addEventListener('keydown', function (e) {
        if (e.key === 'Escape') {
          closeDD();
        }
      });
      function makeItem(label, fn) {
        var b = document.createElement('button');
        b.className = 'ck-item';
        b.textContent = label;
        b.onclick = function () {
          fn();
          closeDD();
        };
        return b;
      }
      function getMarkdown() {
        var walk = function (node) {
          if (!node) return '';
          if (node.nodeType === 3) return node.textContent || '';
          if (node.nodeType !== 1) return '';
          var tag = node.tagName.toLowerCase();
          var skip = ['script', 'style', 'svg', 'noscript', 'button', 'iframe'];
          if (skip.indexOf(tag) !== -1) return '';
          if (node.id === 'ck-tools') return '';
          var ch = Array.from(node.childNodes).map(walk).join('');
          if (tag === 'h1') return '\n# ' + ch.trim() + '\n\n';
          if (tag === 'h2') return '\n## ' + ch.trim() + '\n\n';
          if (tag === 'h3') return '\n### ' + ch.trim() + '\n\n';
          if (tag === 'p') return '\n' + ch.trim() + '\n\n';
          if (tag === 'pre') return '\n```\n' + node.textContent.trim() + '\n```\n\n';
          if (tag === 'li') return '- ' + ch.trim() + '\n';
          if (tag === 'code') return '`' + ch.trim() + '`';
          return ch;
        };
        var content = document.querySelector('.mdx-content') || document.getElementById('content-area') || document.body;
        return walk(content).replace(/\n\n\n+/g, '\n\n').trim();
      }
      function copyMd() {
        var md = getMarkdown();
        navigator.clipboard.writeText(md).then(function () {
          mainBtn.textContent = 'Copied!';
          setTimeout(function () {
            mainBtn.textContent = 'Copy page';
          }, 2000);
        });
      }
      function viewMd() {
        var md = getMarkdown();
        var safe = md.split('&').join('&amp;').split('<').join('&lt;').split('>').join('&gt;');
        var html = '<!DOCTYPE html><html><head><meta charset="utf-8"><style>body{font-family:monospace;max-width:860px;margin:40px auto;padding:0 24px;line-height:1.7;white-space:pre-wrap;word-wrap:break-word}</style></head><body>' + safe + '</body></html>';
        window.open(URL.createObjectURL(new Blob([html], {
          type: 'text/html'
        })), '_blank');
      }
      function openClaude() {
        var prompt = 'Can you read this Cekura docs page ' + window.location.href + ' so I can ask you questions?';
        window.open('https://claude.ai/new?q=' + encodeURIComponent(prompt), '_blank');
      }
      mainBtn.onclick = copyMd;
      dd.appendChild(makeItem('Copy page', copyMd));
      dd.appendChild(makeItem('View as Markdown', viewMd));
      dd.appendChild(makeItem('Open in Claude', openClaude));
      row.appendChild(mainBtn);
      row.appendChild(divider);
      row.appendChild(chevron);
      wrap.appendChild(row);
      wrap.appendChild(dd);
      anchor.style.position = 'relative';
      anchor.insertBefore(wrap, anchor.firstChild);
    }, 50);
  }
  return null;
};

<CopyPageButton />

## Overview

Building a robust testing suite for your AI voice agent doesn't have to be overwhelming. We've found that many successful users follow a simple, iterative approach that yields high-quality test cases in just one cycle.

## The Recommended Workflow

A workflow that allows many of our users to derive significant value:

<Steps>
  <Step title="Generate 10 test cases">
    Start by creating 10 diverse test cases that cover different scenarios your agent might encounter.

    **What to include:**

    * Common user requests (happy path scenarios)
    * Edge cases (unusual but valid requests)
    * Error conditions (invalid inputs, missing information)
    * Different user personalities and communication styles

    <Tip>
      Use Cekura's AI-powered scenario generation to quickly create varied test cases based on your agent's purpose.
    </Tip>
  </Step>

  <Step title="Run them">
    Execute all 10 test cases against your agent.

    **During the run:**

    * Let each conversation complete naturally

    This gives you a baseline understanding of how your agent performs across different scenarios.
  </Step>

  <Step title="Review the failed calls">
    Analyze the conversations where your agent didn't meet the expected outcome.

    **What to look for:**

    * Why did the conversation fail?
    * Did the agent misunderstand the request?
    * Was information missing or incorrect?
    * Did the agent handle edge cases poorly?
    * Were there technical issues (latency, interruptions)?
  </Step>
</Steps>

<Note>
  **If a call is marked as failure but you believe it should be successful**, check these two things:

  1. **Is the expected outcome prompt correct and clear?**
     * If not: Edit the expected outcome prompt directly from inside the run
     * Re-evaluate the call until it passes
     * Hit **Save** to update the evaluator with the corrected expected outcome
  2. **Did the testing agent follow the instructions provided?**
     * If not: Review our [Evaluator Instructions Guide](/documentation/guides/prompting#scenario-examples) for best practices
     * Still having issues? Reach out to us at [support@cekura.ai](mailto:support@cekura.ai)
</Note>

## Why This Works

**After just one iteration of this exercise, you will have 10 very good test cases you can always rely on.**

Here's what makes this approach effective:

### 1. Real-World Validation

Your test cases are validated against actual agent behavior, not theoretical scenarios. You know exactly how your agent responds.

### 2. Failure-Driven Refinement

Failed calls help you:

* Refine your agent's prompts and logic
* Identify missing features or capabilities
* Improve error handling
* Adjust expected outcomes to be more realistic

### 3. Regression Testing Foundation

Once refined, these 10 test cases become your regression test suite. Run them after every agent update to ensure you haven't broken existing functionality.

### 4. Iterative Improvement

Each cycle of this workflow compounds your testing quality:

* Cycle 1: Establish baseline, fix obvious issues
* Cycle 2: Handle edge cases better
* Cycle 3: Optimize performance and user experience

## Expanding Your Test Suite

After your initial 10 test cases are solid, you can expand strategically:

<CardGroup cols={2}>
  <Card title="Add Personality Variations" icon="masks-theater" color="#A6A7EA">
    Test the same scenarios with different personalities (patient, frustrated, background noise)
  </Card>

  <Card title="Cover More Scenarios" icon="list-check" color="#A6A7EA">
    Generate additional test cases for less common but important use cases
  </Card>

  <Card title="Test Profile Variations" icon="id-card" color="#A6A7EA">
    Use different test profiles to validate identity verification flows
  </Card>

  <Card title="Stress Testing" icon="gauge-high" color="#A6A7EA">
    Add load testing to ensure your agent performs under high traffic
  </Card>
</CardGroup>

## Best Practices

### Start Simple

Don't try to cover every possible scenario on day one. Start with 10 good test cases and build from there.

### Be Specific with Expected Outcomes

Vague expected outcomes make it hard to evaluate success. Instead of "Agent handles the request well," use "Agent cancels the appointment and provides confirmation number."

### Use Realistic Instructions

Your evaluator instructions should mimic how real users would interact with your agent. Avoid overly scripted or robotic instructions.

### Review Passed Calls Too

Don't only focus on failures. Review successful calls to understand what your agent does well and ensure the success wasn't accidental.

### Maintain Your Test Suite

As your agent evolves, update your test cases and expected outcomes to reflect new capabilities and requirements.

## Example: Building Your First 10 Test Cases

Let's say you're testing a restaurant reservation AI agent. Here's a balanced set of 10 test cases:

| #  | Scenario Type       | Description                                                    |
| -- | ------------------- | -------------------------------------------------------------- |
| 1  | Happy Path          | Make a reservation for 2 people tonight at 7 PM                |
| 2  | Happy Path          | Make a reservation for 4 people next Friday at 6:30 PM         |
| 3  | Date Clarification  | "I want to book a table for Saturday" (this Saturday or next?) |
| 4  | Time Unavailable    | Request a time slot that's fully booked                        |
| 5  | Modification        | Change an existing reservation time                            |
| 6  | Cancellation        | Cancel an existing reservation                                 |
| 7  | Information Request | Ask about menu options or special dietary accommodations       |
| 8  | Large Party         | Request reservation for 10+ people                             |
| 9  | Interrupted User    | User with background noise and interruptions                   |
| 10 | Non-Native Speaker  | User with slower pace and accent                               |

This mix covers:

* **40%** standard scenarios (1, 2, 5, 6)
* **30%** clarification and error handling (3, 4, 7)
* **20%** edge cases (8)
* **10%** challenging conditions (9, 10)

## Measuring Success

After running your workflow, you should aim for:

* **70-80% pass rate** on first run (realistic baseline)
* **90-95% pass rate** after refining based on failures
* **95%+ pass rate** as your long-term regression suite

<Warning>
  **Don't aim for 100%**: Real-world conversations are unpredictable. Some variability is normal and healthy. Focus on consistency in core functionality.
</Warning>

## Next Steps

Once you have your reliable 10 test cases:

1. **Schedule Regular Runs**: Set up [cron jobs](/documentation/guides/cronjob) to run your tests automatically
2. **Monitor Metrics**: Track performance over time using [metrics](/documentation/key-concepts/metrics/overview)
3. **Iterate on Failures**: Continuously refine your agent based on test results
4. **Expand Coverage**: Gradually add more test cases for comprehensive coverage

***

## Related Resources

* [Creating Evaluators](/documentation/key-concepts/evaluators/overview)
* [Prompting Examples](/documentation/guides/prompting)
* [Custom Metrics](/documentation/key-concepts/metrics/custom-metrics)
* [Scheduling Tests](/documentation/guides/cronjob)
