> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cekura.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Python Metric

> Write custom evaluation logic in Python to evaluate your AI agent's performance with complete control

export const CopyPageButton = () => {
  if (typeof window !== 'undefined') {
    setTimeout(function () {
      if (document.getElementById('ck-tools')) return;
      var anchor = document.getElementById('content-area') || document.querySelector('.mdx-content');
      if (!anchor) return;
      if (!document.getElementById('ck-style')) {
        var s = document.createElement('style');
        s.id = 'ck-style';
        s.textContent = '#ck-tools{position:absolute;top:6px;right:0;z-index:100;font-family:inherit;}' + '.ck-row{display:inline-flex;align-items:stretch;border:1px solid rgba(0,0,0,0.15);border-radius:8px;overflow:hidden;background:#fff;}' + ':root.dark .ck-row{background:rgba(255,255,255,0.06);border-color:rgba(255,255,255,0.12);}' + '.ck-btn{padding:5px 12px;border:none;background:none;cursor:pointer;font-size:13px;font-weight:500;font-family:inherit;color:#374151;}' + ':root.dark .ck-btn{color:#d1d5db;}' + '.ck-btn:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-btn:hover{background:rgba(255,255,255,0.06);}' + '.ck-chevron{padding:5px 8px;border:none;background:none;cursor:pointer;font-size:14px;font-family:inherit;color:#374151;}' + ':root.dark .ck-chevron{color:#d1d5db;}' + '.ck-chevron:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-chevron:hover{background:rgba(255,255,255,0.06);}' + '.ck-divider{width:1px;background:rgba(0,0,0,0.12);flex-shrink:0;}' + ':root.dark .ck-divider{background:rgba(255,255,255,0.12);}' + '.ck-dd{position:absolute;top:calc(100% + 4px);right:0;min-width:180px;background:#fff;border:1px solid rgba(0,0,0,0.12);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,0.1);padding:4px;display:none;z-index:200;}' + ':root.dark .ck-dd{background:#1f2937;border-color:rgba(255,255,255,0.1);box-shadow:0 4px 16px rgba(0,0,0,0.35);}' + '.ck-item{display:block;width:100%;padding:7px 12px;border:none;background:none;border-radius:6px;cursor:pointer;font-size:13px;font-family:inherit;text-align:left;color:#374151;}' + ':root.dark .ck-item{color:#d1d5db;}' + '.ck-item:hover{background:rgba(0,0,0,0.05);}' + ':root.dark .ck-item:hover{background:rgba(255,255,255,0.07);}';
        document.head.appendChild(s);
      }
      var wrap = document.createElement('div');
      wrap.id = 'ck-tools';
      var row = document.createElement('div');
      row.className = 'ck-row';
      var mainBtn = document.createElement('button');
      mainBtn.className = 'ck-btn';
      mainBtn.textContent = 'Copy page';
      var divider = document.createElement('span');
      divider.className = 'ck-divider';
      var chevron = document.createElement('button');
      chevron.className = 'ck-chevron';
      chevron.textContent = '▾';
      var dd = document.createElement('div');
      dd.className = 'ck-dd';
      function closeDD() {
        dd.style.display = 'none';
      }
      function openDD() {
        dd.style.display = 'block';
      }
      chevron.onclick = function (e) {
        e.stopPropagation();
        if (dd.style.display === 'block') {
          closeDD();
        } else {
          openDD();
        }
      };
      document.addEventListener('click', function (e) {
        if (!e.target.closest('#ck-tools')) {
          closeDD();
        }
      });
      document.addEventListener('keydown', function (e) {
        if (e.key === 'Escape') {
          closeDD();
        }
      });
      function makeItem(label, fn) {
        var b = document.createElement('button');
        b.className = 'ck-item';
        b.textContent = label;
        b.onclick = function () {
          fn();
          closeDD();
        };
        return b;
      }
      function getMarkdown() {
        var walk = function (node) {
          if (!node) return '';
          if (node.nodeType === 3) return node.textContent || '';
          if (node.nodeType !== 1) return '';
          var tag = node.tagName.toLowerCase();
          var skip = ['script', 'style', 'svg', 'noscript', 'button', 'iframe'];
          if (skip.indexOf(tag) !== -1) return '';
          if (node.id === 'ck-tools') return '';
          var ch = Array.from(node.childNodes).map(walk).join('');
          if (tag === 'h1') return '\n# ' + ch.trim() + '\n\n';
          if (tag === 'h2') return '\n## ' + ch.trim() + '\n\n';
          if (tag === 'h3') return '\n### ' + ch.trim() + '\n\n';
          if (tag === 'p') return '\n' + ch.trim() + '\n\n';
          if (tag === 'pre') return '\n```\n' + node.textContent.trim() + '\n```\n\n';
          if (tag === 'li') return '- ' + ch.trim() + '\n';
          if (tag === 'code') return '`' + ch.trim() + '`';
          return ch;
        };
        var content = document.querySelector('.mdx-content') || document.getElementById('content-area') || document.body;
        return walk(content).replace(/\n\n\n+/g, '\n\n').trim();
      }
      function copyMd() {
        var md = getMarkdown();
        navigator.clipboard.writeText(md).then(function () {
          mainBtn.textContent = 'Copied!';
          setTimeout(function () {
            mainBtn.textContent = 'Copy page';
          }, 2000);
        });
      }
      function viewMd() {
        var md = getMarkdown();
        var safe = md.split('&').join('&amp;').split('<').join('&lt;').split('>').join('&gt;');
        var html = '<!DOCTYPE html><html><head><meta charset="utf-8"><style>body{font-family:monospace;max-width:860px;margin:40px auto;padding:0 24px;line-height:1.7;white-space:pre-wrap;word-wrap:break-word}</style></head><body>' + safe + '</body></html>';
        window.open(URL.createObjectURL(new Blob([html], {
          type: 'text/html'
        })), '_blank');
      }
      function openClaude() {
        var prompt = 'Can you read this Cekura docs page ' + window.location.href + ' so I can ask you questions?';
        window.open('https://claude.ai/new?q=' + encodeURIComponent(prompt), '_blank');
      }
      mainBtn.onclick = copyMd;
      dd.appendChild(makeItem('Copy page', copyMd));
      dd.appendChild(makeItem('View as Markdown', viewMd));
      dd.appendChild(makeItem('Open in Claude', openClaude));
      row.appendChild(mainBtn);
      row.appendChild(divider);
      row.appendChild(chevron);
      wrap.appendChild(row);
      wrap.appendChild(dd);
      anchor.style.position = 'relative';
      anchor.insertBefore(wrap, anchor.firstChild);
    }, 50);
  }
  return null;
};

<CopyPageButton />

## Python Metric

Python Metric allows you to write custom evaluation logic in Python to evaluate your AI agent's performance. This gives you complete control over the evaluation process and enables complex analysis that goes beyond simple prompt-based metrics.

<Note>
  The set of variables available in Python metrics (the `data` dict keys) is the same set used by LLM Judge metrics. For the canonical reference table — including which variables are available in **Simulation** vs. **Observability** — see [Metric Variables](/documentation/key-concepts/metrics/metric-variables). The detailed Python-specific accessors are documented below.
</Note>

### Overview

Custom code metrics are executed in a secure Python environment with access to call data including transcripts, metadata, and dynamic variables. Your code must set specific output variables to provide the evaluation result and explanation.

**Cost:** Python metrics do not consume credits. Unlike LLM judge or predefined metrics (which cost 0.2 credits per evaluation), Python metrics run free of charge regardless of evaluation volume.

### Available Data Variables

When writing your custom code, you have access to different variables depending on the evaluation context.

#### Quick Reference

| Variable                                                                                                | Simulation | Observability |
| ------------------------------------------------------------------------------------------------------- | ---------- | ------------- |
| <span title="Full conversation transcript as formatted string">**transcript**</span>                    | ✅          | ✅             |
| <span title="Structured transcript with timing and speaker info">**transcript\_json**</span>            | ✅          | ✅             |
| <span title="Call duration in seconds">**call\_duration**</span>                                        | ✅          | ✅             |
| <span title="Reason why the call ended">**call\_end\_reason**</span>                                    | ✅          | ✅             |
| <span title="URL to the voice recording file">**voice\_recording**</span>                               | ✅          | ✅             |
| <span title="Description of the AI agent">**agent\_description**</span>                                 | ✅          | ✅             |
| <span title="Current date in YYYY-MM-DD format">**date**</span>                                         | ✅          | ✅             |
| <span title="ISO 8601 formatted timestamp">**timestamp**</span>                                         | ✅          | ✅             |
| <span title="Additional context metadata dictionary">**metadata**</span>                                | ✅          | ✅             |
| <span title="Audio metadata and analysis results">**recording\_data**</span>                            | ✅          | ✅             |
| <span title="Results from evaluated metrics">**Metric Results**</span>                                  | ✅          | ✅             |
| <span title="Call topic/subject (Observability only)">**topic**</span>                                  | ❌          | ✅             |
| <span title="Average latency and detailed latency data">**Latency Metrics**</span>                      | ✅          | ✅             |
| <span title="Dynamic variables from API (Observability only)">**dynamic\_variables**</span>             | ❌          | ✅             |
| <span title="CallLog ID (Observability only)">**call\_log\_id**</span>                                  | ❌          | ✅             |
| <span title="Tags from scenario (Simulation only)">**tags**</span>                                      | ✅          | ❌             |
| <span title="Provider call identifier (Simulation only)">**provider\_call\_id**</span>                  | ✅          | ❌             |
| <span title="Provider-specific call details (Simulation only)">**provider\_call\_data**</span>          | ✅          | ❌             |
| <span title="Cekura transcript format (Simulation only)">**cekura\_transcript\_json**</span>            | ✅          | ❌             |
| <span title="Test scenario data (Simulation only)">**test\_profile**</span>                             | ✅          | ❌             |
| <span title="Run ID (Simulation only)">**run\_id**</span>                                               | ✅          | ❌             |
| <span title="Expected outcome value (Simulation only)">**expected\_outcome**</span>                     | ✅          | ❌             |
| <span title="Expected outcome explanations (Simulation only)">**expected\_outcome\_explanation**</span> | ✅          | ❌             |

#### Detailed Field Documentation

##### Available in Both Simulation & Observability

<AccordionGroup>
  <Accordion title="transcript" href="#transcript">
    **Availability:** ✅ Simulation | ✅ Observability

    Full conversation transcript as a formatted string with timestamps

    ```python theme={null}
    # Access the full transcript
    transcript = data["transcript"]
    # Output:
    # "[00:01] Main Agent: Hello.\n[00:12] Testing Agent: L m z o uh-huh.\n[00:14] Main Agent: Could you clarify your message or let me know how I can assist you?\n[00:22] Testing Agent: Hello? I'm Vicky.\n[00:27] Main Agent: Hi, Vicky. How can I help you today?..."
    ```
  </Accordion>

  <Accordion title="transcript_json" href="#transcript-json">
    **Availability:** ✅ Simulation | ✅ Observability

    Transcript as a structured list with detailed timing and speaker information

    ```python theme={null}
    # Access structured transcript with detailed timing
    transcript_json = data["transcript_json"]
    # Actual structure:
    # [
    #   {
    #     "role": "Main Agent",
    #     "time": "00:01", 
    #     "content": "Hello.",
    #     "end_time": 1.817,
    #     "start_time": 1.317
    #   },
    #   {
    #     "role": "Testing Agent",
    #     "time": "00:12",
    #     "content": "L m z o uh-huh.", 
    #     "end_time": 13.817,
    #     "start_time": 12.357
    #   }
    # ]

    ```
  </Accordion>

  <Accordion title="call_duration" href="#call-duration">
    **Availability:** ✅ Simulation | ✅ Observability

    Call duration in seconds as a float

    ```python theme={null}
    # Access call duration
    call_duration = data["call_duration"]
    # Output: 125.5 (seconds)

    ```
  </Accordion>

  <Accordion title="call_end_reason" href="#call-end-reason">
    **Availability:** ✅ Simulation | ✅ Observability

    Reason why the call ended

    ```python theme={null}
    # Access call end reason
    end_reason = data["call_end_reason"]
    # Example values: "main-agent-ended-call", "testing-agent-ended-call"

    ```
  </Accordion>

  <Accordion title="voice_recording" href="#voice-recording">
    **Availability:** ✅ Simulation | ✅ Observability

    URL to the voice recording file

    ```python theme={null}
    # Access voice recording URL
    recording_url = data["voice_recording"]
    # Output: "https://recordings.example.com/call_123.wav"

    ```
  </Accordion>

  <Accordion title="agent_description" href="#agent-description">
    **Availability:** ✅ Simulation | ✅ Observability

    Description of the AI agent used in the call

    ```python theme={null}
    # Access agent description
    agent_desc = data["agent_description"]
    # Output: "Customer service agent with product knowledge and billing expertise"

    ```
  </Accordion>

  <Accordion title="metadata" href="#metadata">
    **Availability:** ✅ Simulation | ✅ Observability

    Additional context metadata as a dictionary

    ```python theme={null}
    # Access metadata
    metadata = data["metadata"]
    # Observability example: {"customer_tier": "premium", "region": "US", "language": "en"}
    # Simulation example: {"ended_reason": "agent_ended_call", "test_mode": true}

    # Available for both Simulation and Observability evaluations
    # Structure varies based on what was stored with the call/run
    ```
  </Accordion>

  <Accordion title="date" href="#date">
    **Availability:** ✅ Simulation | ✅ Observability

    Current date in YYYY-MM-DD format

    ```python theme={null}
    # Access current date
    current_date = data["date"]
    # Output: "2026-01-31"

    ```
  </Accordion>

  <Accordion title="timestamp" href="#timestamp">
    **Availability:** ✅ Simulation | ✅ Observability

    ISO 8601 formatted timestamp of when the call/run occurred

    ```python theme={null}
    # Access timestamp
    timestamp = data["timestamp"]
    # Output: "2026-02-19T09:09:51.346633+00:00"

    # Parse timestamp for date/time analysis
    from datetime import datetime
    dt = datetime.fromisoformat(timestamp)

    # Example: Check if call was during business hours
    hour = dt.hour
    if 9 <= hour < 17:
        _result = "Business hours"
    ```
  </Accordion>

  <Accordion title="recording_data" href="#recording-data">
    **Availability:** ✅ Simulation | ✅ Observability

    Audio metadata and analysis results as a dictionary containing:

    * **`has_audio_data`** (boolean) - Whether audio data is available for analysis
    * **`sample_rate`** (integer) - Audio sample rate in Hz (e.g., 8000, 16000)
    * **`shape`** (list) - Audio dimensions as \[total\_samples, channels]
    * **`separable_channels`** (boolean) - Whether stereo channels can be separated into distinct speaker channels
    * **`total_duration`** (float) - Total audio duration in seconds
    * **`main_speaking`** (list) - Speaking segments for the main/agent channel as \[\[start, end], ...] in seconds
    * **`testing_speaking`** (list) - Speaking segments for the testing/user channel as \[\[start, end], ...] in seconds

    ```python theme={null}
    {
      "has_audio_data": True,
      "sample_rate": 8000,
      "shape": [561600, 2],
      "separable_channels": True,
      "total_duration": 70.2,
      "main_speaking": [[1.32, 8.39], [21.45, 23.6], [36.62, 37.05]],
      "testing_speaking": [[0.33, 8.44], [9.35, 9.5], [19.43, 20.29]]
    }
    ```
  </Accordion>
</AccordionGroup>

##### Observability Only

<AccordionGroup>
  <Accordion title="dynamic_variables" href="#dynamic-variables">
    **Availability:** ❌ Simulation | ✅ Observability

    Dynamic variables configured for the agent as a dictionary

    ```python theme={null}
    # Access dynamic variables
    variables = data["dynamic_variables"]
    # Output: {"customer_name": "John", "account_id": "ACC123", "plan_type": "premium"}

    ```
  </Accordion>

  <Accordion title="call_log_id" href="#call-log-id">
    **Availability:** ❌ Simulation | ✅ Observability

    CallLog ID for observability calls

    ```python theme={null}
    # Access call log ID
    call_log_id = data["call_log_id"]
    # Output: 12345

    ```
  </Accordion>

  <Accordion title="topic" href="#topic">
    **Availability:** ❌ Simulation | ✅ Observability

    Call topic/subject

    ```python theme={null}
    # Access call topic
    topic = data["topic"]
    # Output: "Billing inquiry and payment issues"

    ```
  </Accordion>
</AccordionGroup>

##### Simulation Only

<AccordionGroup>
  <Accordion title="tags" href="#tags">
    **Availability:** ✅ Simulation | ❌ Observability

    Tags associated with the scenario

    ```python theme={null}
    # Access tags
    tags = data["tags"]
    # Simulation with scenario: ["billing", "priority", "escalation"]
    # Without scenario: []

    # Tags are only available for Simulation evaluations with an associated scenario
    ```
  </Accordion>

  <Accordion title="provider_call_id" href="#provider-call-id">
    **Availability:** ✅ Simulation | ❌ Observability

    Provider-specific call identifier

    ```python theme={null}
    # Access provider call ID
    call_id = data.get("provider_call_id", "")
    # Output: "provider-call-id"

    ```
  </Accordion>

  <Accordion title="provider_call_data" href="#provider-call-data">
    **Availability:** ✅ Simulation | ❌ Observability

    Provider-specific call details as a dictionary

    ```python theme={null}
    # Access provider call data
    provider_data = data.get("provider_call_data", {})
    # Note: Structure varies by provider - contains provider-specific call details
    ```
  </Accordion>

  <Accordion title="cekura_transcript_json" href="#cekura-transcript-json">
    **Availability:** ✅ Simulation | ❌ Observability

    Cekura-specific transcript format

    ```python theme={null}
    # Access Cekura transcript format
    cekura_transcript = data.get("cekura_transcript_json", [])
    # Structure:
    # [
    #   {"role": "assistant", "content": "Hello", "timestamp": 1.0},
    #   {"role": "user", "content": "Hi there", "timestamp": 5.5}
    # ]

    # Provides alternative transcript format with simplified structure.
    # Note: this format uses "assistant"/"user" roles, unlike transcript_json
    # which uses "Main Agent"/"Testing Agent".
    ```
  </Accordion>

  <Accordion title="test_profile" href="#test-profile">
    **Availability:** ✅ Simulation | ❌ Observability

    Test scenario data configured for simulation runs

    ```python theme={null}
    # test_profile is only available if the run has an associated test profile
    test_profile = data.get("test_profile", {})
    # Example: {"company": "Cekura", "customer_type": "frustrated", "issue": "billing"}

    # Use .get() to safely access since test_profile may not always be present
    if test_profile:
        company = test_profile.get("company", "Unknown")
        customer_type = test_profile.get("customer_type", "")
        issue = test_profile.get("issue", "")
    ```
  </Accordion>

  <Accordion title="run_id" href="#run-id">
    **Availability:** ✅ Simulation | ❌ Observability

    Run ID for simulation runs

    ```python theme={null}
    # Access run ID
    run_id = data["run_id"]
    # Output: 456

    ```
  </Accordion>

  <Accordion title="expected_outcome" href="#expected-outcome">
    **Availability:** ✅ Simulation | ❌ Observability

    Expected outcome value for the test scenario

    ```python theme={null}
    # Access expected outcome
    expected_outcome = data["expected_outcome"]
    # Output: 4.2

    ```
  </Accordion>

  <Accordion title="expected_outcome_explanation" href="#expected-outcome-explanation">
    **Availability:** ✅ Simulation | ❌ Observability

    List of explanation strings for expected outcome

    ```python theme={null}
    # Access expected outcome explanations
    expected_explanations = data["expected_outcome_explanation"]
    # Example: ["Expected positive customer outcome", "Billing issue should be resolved"]

    ```
  </Accordion>
</AccordionGroup>

#### Metric Results Access

These results are available in **both Simulation and Observability** contexts:

<AccordionGroup>
  <Accordion title="Individual Metric Results" href="#individual-metrics">
    **Availability:** ✅ Simulation | ✅ Observability

    Access any evaluated metric result directly by name

    ```python theme={null}
    # Access individual metric results by name
    customer_satisfaction = data["Customer Satisfaction"]  # Could be: 4.5, "Excellent", 85
    response_time = data["Response Time"]  # Could be: 120 (seconds)
    product_knowledge = data["Product Knowledge"]  # Could be: 85, "Good", 4.2
    workflow_adherence = data["Workflow Adherence"]  # Could be: "Good", 0.8, 78

    ```
  </Accordion>

  <Accordion title="Metric Explanations" href="#metric-explanations">
    **Availability:** ✅ Simulation | ✅ Observability

    List of explanation strings for each metric

    ```python theme={null}
    # Access metric explanations
    explanations = data["explanation"]

    # Get explanations for specific metrics
    satisfaction_reasons = explanations["Customer Satisfaction"]
    # Example: ["Customer expressed satisfaction", "Positive tone detected", "Issue resolved"]

    response_reasons = explanations["Response Time"]
    # Example: ["Response was within acceptable range", "No long pauses detected"]

    ```
  </Accordion>

  <Accordion title="Latency Metrics" href="#latency-metrics">
    **Availability:** ✅ Simulation | ✅ Observability

    Latency metrics for performance analysis

    The `latency_data` list contains detailed information about each turn's latency:

    * **`latency`**: The duration of the latency in milliseconds.
    * **`speaker`**: The entity associated with the latency (e.g., "Main Agent").
    * **`start_time`**: The timestamp when the turn started, in seconds.

    ```python theme={null}
    # Access latency metrics
    avg_latency = data["Average Latency (in ms)"]  # Example: 1607.5
    latency_data = data["latency_data"]
    # Actual structure:
    # [
    #   {"latency": 1680.0, "speaker": "Main Agent", "start_time": 14.07},
    #   {"latency": 1240.0, "speaker": "Main Agent", "start_time": 26.51},
    #   {"latency": 1970.0, "speaker": "Main Agent", "start_time": 38.33},
    #   {"latency": 1540.0, "speaker": "Main Agent", "start_time": 51.68}
    # ]

    ```
  </Accordion>
</AccordionGroup>

### Required Output Variables

Your Python code must set these two variables:

* **`_result`** - The evaluation outcome (can be boolean, numeric, string, etc.)
* **`_explanation`** - A string explaining the reasoning behind the result

### Example Code

Here's a simple example that checks if the agent mentioned a specific product:

```python theme={null}
# Check if the agent mentioned "Premium Plan" in the conversation
transcript = data["transcript"].lower()
if "premium plan" in transcript:
    _result = True
    _explanation = "Agent successfully mentioned the Premium Plan during the conversation"
else:
    _result = False
    _explanation = "Agent did not mention the Premium Plan in the conversation"
```

### Evaluation Trigger (Custom Code)

A metric's **Evaluation Trigger** decides *whether* the metric runs on a given call (distinct from the metric body, which decides the score). When the trigger is set to **Custom** with **Custom Code**, you write a short Python snippet that runs in the same secure Python environment as a Python metric: it receives the same [`data`](#available-data-variables) dictionary and must set `_result` and `_explanation`.

* `_result` — a **boolean**: `True` to run the metric on this call, `False` to skip it.
* `_explanation` — a string explaining the decision.

<Warning>
  A trigger is **not** a function — there is no `return`. Set `_result` and `_explanation` as variables, exactly like a Python metric. The transcript is in `data["transcript_json"]` (there is no `messages` variable), and each turn's `role` is `"Main Agent"` or `"Testing Agent"` — not `"agent"`, `"assistant"`, or `"user"`.
</Warning>

**Example** — only run the metric when the Main Agent spoke more than 5 times:

```python theme={null}
main_messages = [m for m in data["transcript_json"] if m["role"] == "Main Agent"]
_result = len(main_messages) > 5
_explanation = f"Main Agent spoke {len(main_messages)} times"
```

**Example** — skip the metric when the customer hung up:

```python theme={null}
_result = True
_explanation = "Metric is relevant"
if data.get("call_end_reason") == "customer-hung-up":
    _result = False
    _explanation = "Customer hung up, metric not applicable"
```

### Latency Threshold Example

This example detects if latency is under a threshold in each turn.

```python theme={null}
import json 
THRESHOLD = 2000

latency_data = data["latency_data"]

# Use a standard loop to avoid the 'NameError'
violations = []
for item in latency_data:
    if item["latency"] > THRESHOLD:
        violations.append(item)

if not violations:
    _result = True
    _explanation = f"Every return is under {THRESHOLD} milliseconds."
else:
    _result = False
    
    formatted_lines = []
    for v in violations:
        # Get the time in seconds (default to 0 if missing)
        total_seconds = v.get('start_time', 0)
        
        # Calculate Minutes and Seconds
        minutes = int(total_seconds // 60)
        seconds = int(total_seconds % 60)
        
        # Format as [MM:SS] with leading zeros (e.g., [01:05])
        timestamp_str = f"[{minutes:02d}:{seconds:02d}]"
        
        # Create the final line
        line = f"{timestamp_str} {json.dumps(v)}"
        formatted_lines.append(line)
    
    # Join with new lines
    _explanation = f"Items with latency > {THRESHOLD}:\n" + "\n".join(formatted_lines)
```

### Complete Data Reference

Here's the complete structure of data available to your custom Python code:

```python theme={null}
# Available Data 
{
  "transcript": "[00:01] Main Agent: Hello.\n[00:12] Testing Agent: L m z o uh-huh.\n[00:14] Main Agent: Could you clarify your message or let me know how I can assist you?...",

  "transcript_json": [
    {
      "role": "Main Agent",
      "time": "00:01",
      "content": "Hello.",
      "end_time": 1.817,
      "start_time": 1.317
    },
    {
      "role": "Testing Agent",
      "time": "00:12",
      "content": "L m z o uh-huh.",
      "end_time": 13.817,
      "start_time": 12.357
    }
  ],

  // ---------- Context Fields ----------
  "call_duration": 180.5,
  "call_end_reason": "customer_satisfaction",
  "voice_recording": "https://recordings.example.com/call123.wav",
  "agent_description": "Customer service agent with product knowledge",
  "date": "2026-01-31",
  "timestamp": "2026-02-19T09:09:51.346633+00:00",
  "metadata": {
    "key": "value"
  },
  "dynamic_variables": {
    "customer_name": "John"
  },
  "tags": ["priority", "billing_inquiry"],

  // ---------- Recording Data ----------
  "recording_data": {
    "has_audio_data": true,
    "sample_rate": 8000,
    "shape": [561600, 2],
    "separable_channels": true,
    "total_duration": 70.2,
    "main_speaking": [[1.32, 8.39], [21.45, 23.6], [36.62, 37.05]],
    "testing_speaking": [[0.33, 8.44], [9.35, 9.5], [19.43, 20.29]]
  },

  // ---------- Metric Results ----------
  "Customer Satisfaction": 4.5,
  "Response Time": 120,
  "Product Knowledge": 85,

  "explanation": {
    "Customer Satisfaction": [
      "Customer expressed satisfaction",
      "Positive tone"
    ],
    "Response Time": [
      "Response was within acceptable range"
    ]
  },

  // ---------- Latency Data ----------
  "Average Latency (in ms)": 1607.5,

  "latency_data": [
    {
      "latency": 1680.0,
      "speaker": "Main Agent",
      "start_time": 14.07
    },
    {
      "latency": 1240.0,
      "speaker": "Main Agent",
      "start_time": 26.51
    },
    {
      "latency": 1970.0,
      "speaker": "Main Agent",
      "start_time": 38.33
    },
    {
      "latency": 1540.0,
      "speaker": "Main Agent",
      "start_time": 51.68
    }
  ],

  // ---------- Expected Outcome ----------
  "expected_outcome": 4.2,
  "expected_outcome_explanation": [
    "Expected positive outcome"
  ],

  // ---------- Call Log Context ----------
  "call_log_id": 123,
  "topic": "Billing inquiry",

  // ---------- Run / Simulation Context ----------
  "run_id": 456,
  "test_profile": {
    "company": "Cekura"
  },
  "provider_call_data": {
    // Provider specific call data
  },
  "provider_call_id": "abc-123",
  "cekura_transcript_json": [
    {"role": "assistant", "content": "Hello", "timestamp": 1.0},
    {"role": "user", "content": "Hi there", "timestamp": 5.5}
  ]
}
```

### Data Flow and Execution Order

**Important:** Custom Python code metrics execute **after** all other metrics (Basic, Advanced, and pre-defined metrics). This means:

1. Non-custom metrics evaluate first
2. Results are structured and merged into the `data` dictionary
3. Custom code receives ALL previous results via direct dictionary access
4. Custom code can build upon or combine existing metric results

### Using Metric Results

You can access the results of other metrics that were evaluated for the same call directly by metric name using `data["Metric Name"]`. You can also access their explanations using `data["explanation"]["Metric Name"]`.

Example usage:

```python theme={null}
# Access metric results directly by name
customer_satisfaction = data["Customer Satisfaction"]
response_time = data["Response Time"]
product_knowledge = data["Product Knowledge"]

# Access metric explanations
satisfaction_reasons = data["explanation"]["Customer Satisfaction"]
response_reasons = data["explanation"]["Response Time"]

# Each metric result contains the evaluation outcome
if isinstance(customer_satisfaction, (int, float)) and customer_satisfaction > 4.0 and response_time < 60:
    _result = "Excellent"
    _explanation = f"Customer was satisfied ({satisfaction_reasons[0]}) and response time was fast ({response_time}s)"
```

### Calling LLM Judge Metrics from Python

#### Function Reference: `evaluate_llm_judge_metric`

The `evaluate_llm_judge_metric` function allows you to evaluate LLM Judge metrics directly from your Python code. This function sends your data and evaluation criteria to Cekura's LLM judge system and returns the evaluation result.

**Function Signature:**

```python theme={null}
def evaluate_llm_judge_metric(
    data: Dict,
    api_key: str,
    description: str,
    eval_type: str|None = None,
    enum_values: List[str]|None = None,
    audio: bool = False,
    audio_start_time: float|None = None,
    audio_end_time: float|None = None,
) -> Dict:
```

**Parameters:**

<ParamField path="data" type="Dict" required>
  This is the same `data` object available in your custom Python code with access to transcript, metadata, and other call data.
</ParamField>

<ParamField path="api_key" type="str" required>
  Your Cekura API key for authentication.
</ParamField>

<ParamField path="description" type="str" required>
  The evaluation prompt/description that guides the LLM judge on how to evaluate the metric.

  You can use context variables in the description using `{{variable}}` syntax (e.g., `{{metadata.instructions}}`). See [LLM Judge Available Variables](/documentation/key-concepts/metrics/metric-variables) for a complete list of available variables.
</ParamField>

<ParamField path="eval_type" type="str" default="None">
  The type of evaluation to perform. Supported values:

  * `"binary_workflow_adherence"` - Binary evaluation (returns 0 or 5)
  * `"binary_qualitative"` - Binary qualitative assessment (returns 0 or 5)
  * `"numeric"` - Numeric evaluation (returns integer or float)
  * `"continuous_qualitative"` - Continuous scale from 0 to 5
  * `"enum"` - Enumerated values (requires `enum_values` parameter)
</ParamField>

<ParamField path="enum_values" type="List[str]|None" default="None">
  List of possible values when using `eval_type="enum"`. Only applicable for ENUM type evaluations.

  Example: `["Excellent", "Good", "Fair", "Poor"]`
</ParamField>

<ParamField path="audio" type="bool" default="False">
  When `True`, the LLM judge analyzes the actual voice recording instead of (or in addition to) the transcript. Use this to evaluate speech delivery, tone, pacing, or other audio properties not captured in the transcript text. Requires `data["voice_recording"]` to be a valid URL.
</ParamField>

<ParamField path="audio_start_time" type="float|None" default="None">
  Clip start in seconds from the call start when `audio=True`. If omitted, the full recording is analyzed. Use `transcript_json` entry `start_time` values to locate specific utterances.
</ParamField>

<ParamField path="audio_end_time" type="float|None" default="None">
  Clip end in seconds from the call start when `audio=True`. Must be greater than `audio_start_time` if both are provided.
</ParamField>

**Return Value:**

Returns a dictionary with two keys:

```python theme={null}
{
  "result": <evaluation_result>,  # Type depends on eval_type
  "explanation": List[<string>]      # Explanation of the evaluation or error message
}
```

* `result`: The evaluated metric value (type depends on `eval_type`)
  * Binary types: `0` or `5`
  * Numeric: `int` or `float`
  * Continuous: `float` between 0 and 5
  * Enum: `string` from `enum_values`

* `explanation`: List of String explaining the evaluation result or error message

<Tip>
  **Using Context Variables:** You can make your LLM judge evaluations dynamic by using context variables in the `description` parameter. For example, use `{{metadata.instructions}}` to reference specific scenario steps the agent was supposed to follow. See [LLM Judge Available Variables](/documentation/key-concepts/metrics/metric-variables) for the complete list.
</Tip>

#### Example Usage

```python theme={null}
key = "<your_cekura_api_key>"

def get_not_early_end_call_description():
    return f"""You are an AI quality assurance analyst tasked with evaluating customer service call transcripts. Your primary objective is to determine if a Main Agent terminated a call prematurely without valid reason. This analysis is crucial for maintaining high standards in customer service interactions."""

call_end_reason = data["call_end_reason"]
transcript_json = data["transcript_json"]

if "main" not in call_end_reason.lower():
    _score = 5
    _explanation = "The call was ended by the Testing Agent or due to error."

description = get_not_early_end_call_description()

response = evaluate_llm_judge_metric(data, key, description, "binary_workflow_adherence")
_result = response.get("result")
_explanation = response.get("explanation")
```

#### Example For Calling Basic Metrics (Deprecated)

<Warning>
  **Deprecated:** `evaluate_basic_metric` is deprecated in favor of `evaluate_llm_judge_metric`. Please use `evaluate_llm_judge_metric` for new implementations.
</Warning>

```python theme={null}
key = "<your_cekura_api_key>"

def get_not_early_end_call_description():
    return f"""You are an AI quality assurance analyst tasked with evaluating customer service call transcripts. Your primary objective is to determine if a Main Agent terminated a call prematurely without valid reason. This analysis is crucial for maintaining high standards in customer service interactions."""

call_end_reason = data["call_end_reason"]
transcript_json = data["transcript_json"]

if "main" not in call_end_reason.lower():
    _score = 5
    _explanation = "The call was ended by the Testing Agent or due to error."

description = get_not_early_end_call_description()

response = evaluate_basic_metric(data, key, description, "binary_workflow_adherence")
_result = response.get("result")
_explanation = response.get("explanation")
```

#### Example For Calling Advanced Metrics (Deprecated)

<Warning>
  **Deprecated:** `evaluate_advance_metric` is deprecated in favor of `evaluate_llm_judge_metric`. Please use `evaluate_llm_judge_metric` for new implementations.
</Warning>

```python theme={null}

key = "<your_cekura_api_key"

def get_not_early_end_call_prompt(transcript, call_end_reason):
    return f"""You are an AI quality assurance analyst tasked with evaluating customer service call transcripts. Your primary objective is to determine if a Main Agent terminated a call prematurely without valid reason. This analysis is crucial for maintaining high standards in customer service interactions.

Please review the following call transcript:

<call_transcript>
{transcript}
</call_transcript>

Now, consider the reason provided for why the call ended:

<call_end_reason>
{call_end_reason}
</call_end_reason>

Your task is to analyze the transcript and call end reason to determine if the Main Agent terminated the call early without justification. 
"""

if "transcript_json" not in data or not data["transcript_json"]:
    _score = None
    _explanation = "No transcript available"

if "call_end_reason" not in data or not data["call_end_reason"]:
    _score = None
    _explanation = "No call end reason available"


call_end_reason = data["call_end_reason"]
transcript_json = data["transcript_json"]

if "main" not in call_end_reason.lower():
    _score = 5
    _explanation = "The call was ended by the Testing Agent or due to error."

prompt = get_not_early_end_call_prompt(data["transcript"], data["call_end_reason"])

response = evaluate_advance_metric(data, key, prompt, "binary_workflow_adherence")
_result = response.get("result")
_explanation = response.get("explanation")
```

### Audio-Based Analysis

Custom Python metrics have access to the full voice recording and pre-computed audio segment timing, enabling you to evaluate speech delivery properties that go beyond what transcript text alone can capture. This is the right approach when no pre-defined metric covers the specific speech quality requirement you need to validate.

**Available audio data:**

* `data["voice_recording"]` — URL to the voice recording file
* `data["recording_data"]` — Pre-computed speaking segments: `main_speaking` and `testing_speaking` arrays of `[start, end]` timestamps in seconds
* `data["transcript_json"]` — Per-utterance data including `start_time` and `end_time` in seconds, useful for locating the audio window around specific spoken content

**Pattern: Locate relevant utterances, then evaluate the audio**

Use `transcript_json` to find when specific content was spoken, then reference those timestamps to identify the corresponding audio segment for evaluation:

```python theme={null}
# Find Main Agent utterances containing content to evaluate
target_utterances = [
    entry for entry in data["transcript_json"]
    if entry["role"] == "Main Agent" and "<target content>" in entry["content"].lower()
]

recording_url = data["voice_recording"]

if not target_utterances:
    _result = False
    _explanation = "No relevant utterances found in the recording"
else:
    # Use start_time / end_time to extract the right audio window.
    # Then pass the audio segment to evaluate_llm_judge_metric or analyze it programmatically.
    segment_start = target_utterances[0]["start_time"]
    segment_end = target_utterances[0]["end_time"]
    # ... audio extraction and evaluation logic ...
    _result = True
    _explanation = "Speech delivery met the expected pattern"
```

**Pattern: Pass audio directly to `evaluate_llm_judge_metric`**

Use the `audio=True` parameter along with `audio_start_time` / `audio_end_time` to let the LLM judge listen to the actual recording segment rather than reading the transcript:

```python theme={null}
key = "<your_cekura_api_key>"

# Find Main Agent utterances containing the content to evaluate
target_utterances = [
    entry for entry in data["transcript_json"]
    if entry["role"] == "Main Agent" and "<target content>" in entry["content"].lower()
]

if not target_utterances:
    _result = False
    _explanation = "No relevant utterances found in the recording"
else:
    segment_start = target_utterances[0]["start_time"]
    segment_end = target_utterances[0]["end_time"]

    response = evaluate_llm_judge_metric(
        data,
        key,
        description="Did the agent deliver this segment clearly, at an appropriate pace, and without unnatural pauses?",
        eval_type="binary_qualitative",
        audio=True,
        audio_start_time=segment_start,
        audio_end_time=segment_end,
    )
    _result = response["result"]
    _explanation = response["explanation"]
```

For custom signal processing instead of LLM judging, use `data["voice_recording"]` and `data["recording_data"]` directly. See [Calling LLM Judge Metrics from Python](#calling-llm-judge-metrics-from-python) for the full function reference including all audio parameters.

**Use cases:**

* Verifying that multi-part spoken content is delivered with appropriate pauses between segments
* Detecting whether specific terms or sequences are spoken with the required cadence
* Evaluating audio-level speech patterns not captured in the transcript (e.g., pacing, grouping, emphasis)

### Advanced Example

Here's a more complex example that analyzes sentiment and response time:

```python theme={null}
import re
from datetime import datetime

# Get transcript data
transcript = data["transcript"]
call_duration = data["call_duration"]

# Analyze agent responses
agent_responses = []
lines = transcript.split('\n')

for line in lines:
    if line.strip().startswith('Agent:'):
        response = line.replace('Agent:', '').strip()
        agent_responses.append(response)

# Calculate average response length
if agent_responses:
    avg_response_length = sum(len(response) for response in agent_responses) / len(agent_responses)

    # Check if responses are detailed enough (more than 50 characters average)
    if avg_response_length > 50:
        _result = True
        _explanation = f"Agent provided detailed responses with average length of {avg_response_length:.1f} characters"
    else:
        _result = False
        _explanation = f"Agent responses were too brief with average length of {avg_response_length:.1f} characters"
else:
    _result = False
    _explanation = "No agent responses found in transcript"
```

### Example Using Multiple Data Sources

Here's an example that combines multiple metric results with call metadata and tags:

```python theme={null}
# Access metric results directly by name
try:
    satisfaction = data["Customer Satisfaction"]
    response_time = data["Response Time"]

    # Access additional call data
    call_duration = data["call_duration"]
    call_end_reason = data["call_end_reason"]
    tags = data["tags"]

    # Check if this was a priority call based on tags
    is_priority = "priority" in tags or "vip" in tags

    # Evaluate based on multiple factors
    if call_end_reason == "hangup" and isinstance(satisfaction, (int, float)) and satisfaction > 3.0 and response_time < 60:
        if is_priority:
            _result = "Excellent"
            _explanation = f"Priority customer was satisfied ({satisfaction}) with fast response time ({response_time}s) and completed the call normally"
        else:
            _result = "Good"
            _explanation = f"Customer was satisfied ({satisfaction}) with fast response time ({response_time}s) and completed the call normally"
    elif call_end_reason in ["timeout", "error"]:
        _result = "Poor"
        _explanation = f"Call ended unexpectedly due to {call_end_reason}, indicating technical issues"
    else:
        _result = "Needs Improvement"
        _explanation = f"Call performance needs improvement - satisfaction: {satisfaction}, response time: {response_time}s, ended reason: {call_end_reason}"

except KeyError as e:
    _result = "Incomplete"
    _explanation = f"Required data not found: {str(e)}"
```
