Skip to content

afnio.tellurio.run

afnio.tellurio.run.RunStatus

Bases: Enum

Represents the status of a Tellurio Run.

It can be one of the following values:

  • "RUNNING": The Run is active and accepting logs, metrics, and artifacts.
  • "COMPLETED": The run has completed successfully without any errors.
  • "CRASHED": The run has crashed due to an unhandled exception or error.
  • "FAILED": A safeguard/intermediate failure state used when a Run was left unfinished or superseded.
Source code in afnio/tellurio/run.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class RunStatus(Enum):
    """
    Represents the status of a Tellurio [`Run`][..Run].

    It can be one of the following values:

    - `"RUNNING"`: The Run is active and accepting logs, metrics, and artifacts.
    - `"COMPLETED"`: The run has completed successfully without any errors.
    - `"CRASHED"`: The run has crashed due to an unhandled exception or error.
    - `"FAILED"`: A safeguard/intermediate failure state used when a Run
        was left unfinished or superseded.
    """

    RUNNING = "RUNNING"
    CRASHED = "CRASHED"
    COMPLETED = "COMPLETED"
    FAILED = "FAILED"

afnio.tellurio.run.RunOrg

Represents a Tellurio Run's Organization.

Each Run belongs to a Project, and each Project belongs to a namespace, which can be either an Organization or a User (personal Organization).

Source code in afnio/tellurio/run.py
40
41
42
43
44
45
46
47
48
49
50
51
52
class RunOrg:
    """Represents a Tellurio [`Run`][..Run]'s Organization.

    Each [`Run`][..Run] belongs to a [`Project`][afnio.tellurio.project.Project],
    and each [`Project`][afnio.tellurio.project.Project] belongs to a namespace,
    which can be either an Organization or a User (personal Organization).
    """

    def __init__(self, slug: str):
        self.slug = slug

    def __repr__(self):
        return f"<Organization slug={self.slug}>"

afnio.tellurio.run.RunProject

Represents a Tellurio Run's Project.

Each Run belongs to a Project, and each Project belongs to a namespace, which can be either an Organization or a User (personal Organization).

Source code in afnio/tellurio/run.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class RunProject:
    """
    Represents a Tellurio [`Run`][..Run]'s Project.

    Each [`Run`][..Run] belongs to a [`Project`][afnio.tellurio.project.Project],
    and each [`Project`][afnio.tellurio.project.Project] belongs to a namespace,
    which can be either an Organization or a User (personal Organization).
    """

    def __init__(self, uuid: str, display_name: str, slug: str):
        self.uuid = uuid
        self.display_name = display_name
        self.slug = slug

    def __repr__(self):
        return f"<Project uuid={self.uuid} display_name={self.display_name}>"

afnio.tellurio.run.RunUser

Represents a Tellurio Run's User.

Each Run is associated with a user who created it.

Source code in afnio/tellurio/run.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class RunUser:
    """
    Represents a Tellurio [`Run`][..Run]'s User.

    Each [`Run`][..Run] is associated with a user who created it.
    """

    def __init__(self, uuid: str, username: str, slug: str):
        self.uuid = uuid
        self.username = username
        self.slug = slug

    def __repr__(self):
        return f"<User uuid={self.uuid} username={self.username}>"

afnio.tellurio.run.Run

Represents a Tellurio Run.

Each Run belongs to a Project, and each Project belongs to a namespace, which can be either an Organization or a User (personal Organization).

A Run represents a single execution or experiment of an AI agent or workflow, that you want to track and log metrics for. It provides methods to log metrics and artifacts, and to mark the Run as completed or crashed. It can also be used as a context manager to automatically handle completion and crashing based on exceptions.

Source code in afnio/tellurio/run.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
class Run:
    """
    Represents a Tellurio `Run`.

    Each [`Run`][.] belongs to a [`Project`][afnio.tellurio.project.Project],
    and each [`Project`][afnio.tellurio.project.Project] belongs to a namespace,
    which can be either an Organization or a User (personal Organization).

    A [`Run`][.] represents a single execution or experiment of an AI agent or
    workflow, that you want to track and log metrics for. It provides methods to log
    metrics and artifacts, and to mark the `Run` as completed or crashed. It can also
    be used as a context manager to automatically handle completion
    and crashing based on exceptions.
    """

    def __init__(
        self,
        uuid: str,
        name: str,
        description: str,
        status: RunStatus,
        date_created: Optional[datetime] = None,
        date_updated: Optional[datetime] = None,
        organization: Optional[RunOrg] = None,
        project: Optional[RunProject] = None,
        user: Optional[RunUser] = None,
    ):
        self.uuid = uuid
        self.name = name
        self.description = description
        self.status = RunStatus(status)
        self.date_created = date_created
        self.date_updated = date_updated
        self.organization = organization
        self.project = project
        self.user = user

    def __repr__(self):
        return (
            f"<Run uuid={self.uuid} name={self.name} status={self.status} "
            f"project={self.project.display_name if self.project else None}>"
        )

    def finish(
        self,
        client: Optional[TellurioClient] = None,
        status: Optional[RunStatus] = RunStatus.COMPLETED,
    ):
        """Marks the run as `"COMPLETED"` on the server by sending a PATCH request,
        and clears the active run UUID.

        Args:
            client: The client to use for the request. If not provided,
                the default client will be used.
            status: The [`RunStatus`][...RunStatus] to set for the run.

        Raises:
            Exception: If the PATCH request fails.
        """
        client = client or get_default_clients()[0]

        namespace_slug = self.organization.slug if self.organization else None
        project_slug = self.project.slug if self.project else None
        run_uuid = self.uuid

        if not (namespace_slug and project_slug and run_uuid):
            raise ValueError("Run object is missing required identifiers.")

        endpoint = f"/api/v0/{namespace_slug}/projects/{project_slug}/runs/{run_uuid}/"
        payload = {"status": status.value}

        try:
            response = client.patch(endpoint, json=payload)
            if response.status_code == 200:
                self.status = status
                if not _IN_ATEXIT:
                    logger.info(f"Run {self.name!r} marked as COMPLETED.")
            else:
                if not _IN_ATEXIT:
                    logger.error(
                        f"Failed to update run status: {response.status_code} - {response.text}"  # noqa: E501
                    )
                response.raise_for_status()
        except Exception as e:
            if not _IN_ATEXIT:
                logger.error(f"An error occurred while updating the run status: {e}")
            raise

        # Clear the active run UUID after finishing
        try:
            set_active_run(None)
        except Exception:
            pass

        # Mark safeguard as finished
        _unregister_safeguard(self)

    def log(
        self,
        name: str,
        value: Any,
        step: Optional[int] = None,
        client: Optional[TellurioClient] = None,
    ):
        """Log a metric for this run.

        Args:
            name: Name of the metric.
            value: Value of the metric. Can be any type that is JSON serializable.
            step: Step number. If not provided, the backend will auto-compute it.
            client: The client to use for the request. If not provided,
                the default client will be used.
        """
        client = client or get_default_clients()[0]

        namespace_slug = self.organization.slug if self.organization else None
        project_slug = self.project.slug if self.project else None
        run_uuid = self.uuid

        if not (namespace_slug and project_slug and run_uuid):
            raise ValueError("Run object is missing required identifiers.")

        endpoint = (
            f"/api/v0/{namespace_slug}/projects/{project_slug}/runs/{run_uuid}/metrics/"
        )
        payload = {
            "name": name,
            "value": value,
        }
        if step is not None:
            payload["step"] = step

        try:
            response = client.post(endpoint, json=payload)
            if response.status_code == 201:
                logger.info(f"Logged metric '{name}'={value} for run '{self.name}'.")
            else:
                logger.error(
                    f"Failed to log metric: {response.status_code} - {response.text}"
                )
                response.raise_for_status()
        except Exception as e:
            logger.error(f"An error occurred while logging the metric: {e}")
            raise

    def __enter__(self):
        """Enter the runtime context related to this object.

        Returns self so it can be used as a context manager.
        """
        _register_safeguard(self)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Exit the runtime context and finish the run.

        If an exception occurred, you may want to set status to `"CRASHED"`
        in the future.
        """
        status = RunStatus.CRASHED if exc_type else RunStatus.COMPLETED
        self.finish(status=status)

finish(client=None, status=RunStatus.COMPLETED)

Marks the run as "COMPLETED" on the server by sending a PATCH request, and clears the active run UUID.

Parameters:

Name Type Description Default
client TellurioClient | None

The client to use for the request. If not provided, the default client will be used.

None
status RunStatus | None

The RunStatus to set for the run.

COMPLETED

Raises:

Type Description
Exception

If the PATCH request fails.

Source code in afnio/tellurio/run.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def finish(
    self,
    client: Optional[TellurioClient] = None,
    status: Optional[RunStatus] = RunStatus.COMPLETED,
):
    """Marks the run as `"COMPLETED"` on the server by sending a PATCH request,
    and clears the active run UUID.

    Args:
        client: The client to use for the request. If not provided,
            the default client will be used.
        status: The [`RunStatus`][...RunStatus] to set for the run.

    Raises:
        Exception: If the PATCH request fails.
    """
    client = client or get_default_clients()[0]

    namespace_slug = self.organization.slug if self.organization else None
    project_slug = self.project.slug if self.project else None
    run_uuid = self.uuid

    if not (namespace_slug and project_slug and run_uuid):
        raise ValueError("Run object is missing required identifiers.")

    endpoint = f"/api/v0/{namespace_slug}/projects/{project_slug}/runs/{run_uuid}/"
    payload = {"status": status.value}

    try:
        response = client.patch(endpoint, json=payload)
        if response.status_code == 200:
            self.status = status
            if not _IN_ATEXIT:
                logger.info(f"Run {self.name!r} marked as COMPLETED.")
        else:
            if not _IN_ATEXIT:
                logger.error(
                    f"Failed to update run status: {response.status_code} - {response.text}"  # noqa: E501
                )
            response.raise_for_status()
    except Exception as e:
        if not _IN_ATEXIT:
            logger.error(f"An error occurred while updating the run status: {e}")
        raise

    # Clear the active run UUID after finishing
    try:
        set_active_run(None)
    except Exception:
        pass

    # Mark safeguard as finished
    _unregister_safeguard(self)

log(name, value, step=None, client=None)

Log a metric for this run.

Parameters:

Name Type Description Default
name str

Name of the metric.

required
value Any

Value of the metric. Can be any type that is JSON serializable.

required
step int | None

Step number. If not provided, the backend will auto-compute it.

None
client TellurioClient | None

The client to use for the request. If not provided, the default client will be used.

None
Source code in afnio/tellurio/run.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def log(
    self,
    name: str,
    value: Any,
    step: Optional[int] = None,
    client: Optional[TellurioClient] = None,
):
    """Log a metric for this run.

    Args:
        name: Name of the metric.
        value: Value of the metric. Can be any type that is JSON serializable.
        step: Step number. If not provided, the backend will auto-compute it.
        client: The client to use for the request. If not provided,
            the default client will be used.
    """
    client = client or get_default_clients()[0]

    namespace_slug = self.organization.slug if self.organization else None
    project_slug = self.project.slug if self.project else None
    run_uuid = self.uuid

    if not (namespace_slug and project_slug and run_uuid):
        raise ValueError("Run object is missing required identifiers.")

    endpoint = (
        f"/api/v0/{namespace_slug}/projects/{project_slug}/runs/{run_uuid}/metrics/"
    )
    payload = {
        "name": name,
        "value": value,
    }
    if step is not None:
        payload["step"] = step

    try:
        response = client.post(endpoint, json=payload)
        if response.status_code == 201:
            logger.info(f"Logged metric '{name}'={value} for run '{self.name}'.")
        else:
            logger.error(
                f"Failed to log metric: {response.status_code} - {response.text}"
            )
            response.raise_for_status()
    except Exception as e:
        logger.error(f"An error occurred while logging the metric: {e}")
        raise

__enter__()

Enter the runtime context related to this object.

Returns self so it can be used as a context manager.

Source code in afnio/tellurio/run.py
234
235
236
237
238
239
240
def __enter__(self):
    """Enter the runtime context related to this object.

    Returns self so it can be used as a context manager.
    """
    _register_safeguard(self)
    return self

__exit__(exc_type, exc_val, exc_tb)

Exit the runtime context and finish the run.

If an exception occurred, you may want to set status to "CRASHED" in the future.

Source code in afnio/tellurio/run.py
242
243
244
245
246
247
248
249
def __exit__(self, exc_type, exc_val, exc_tb):
    """Exit the runtime context and finish the run.

    If an exception occurred, you may want to set status to `"CRASHED"`
    in the future.
    """
    status = RunStatus.CRASHED if exc_type else RunStatus.COMPLETED
    self.finish(status=status)

afnio.tellurio.run.init(namespace_slug, project_display_name, name=None, description=None, status=RunStatus.RUNNING, client=None)

Initializes a new Tellurio Run.

Parameters:

Name Type Description Default
namespace_slug str

The namespace slug where the project resides. It can be either an organization slug or a user slug.

required
project_display_name str

The display name of the project. This will be used to retrive or create the project through its slugified version.

required
name str | None

The name of the run. If not provided, a random name is generated (e.g., "brave_pasta_123"). If the name is provided but already exists, an incremental number is appended to the name (e.g., "test_run_1", "test_run_2").

None
description str | None

A description of the run.

None
status RunStatus | None

The status of the run (default: "RUNNING").

RUNNING
client TellurioClient | None

An instance of TellurioClient. If not provided, the default client will be used.

None

Returns:

Type Description
Run

A Run object representing the created run.

Raises:

Type Description
Exception

If there is an error during the API requests to create the run or retrieve/create the Project.

Source code in afnio/tellurio/run.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def init(
    namespace_slug: str,
    project_display_name: str,
    name: Optional[str] = None,
    description: Optional[str] = None,
    status: Optional[RunStatus] = RunStatus.RUNNING,
    client: Optional[TellurioClient] = None,
) -> Run:
    """Initializes a new Tellurio [`Run`][afnio.tellurio.run.Run].

    Args:
        namespace_slug: The namespace slug where the project resides. It can be
          either an organization slug or a user slug.
        project_display_name: The display name of the project. This will be used
            to retrive or create the project through its slugified version.
        name: The name of the run. If not provided, a random name is
            generated (e.g., "brave_pasta_123"). If the name is provided but already
            exists, an incremental number is appended to the name (e.g., "test_run_1",
            "test_run_2").
        description: A description of the run.
        status: The status of the run (default: "RUNNING").
        client: An instance of TellurioClient. If not provided,
            the default client will be used.

    Returns:
        A Run object representing the created run.

    Raises:
        Exception: If there is an error during the API requests to create the run or
            retrieve/create the [`Project`][afnio.tellurio.project.Project].
    """
    client = client or get_default_clients()[0]

    # Generate the project's slug from its name
    project_slug = slugify(project_display_name)

    # Ensure the project exists
    try:
        project_obj = get_project(
            namespace_slug=namespace_slug,
            project_slug=project_slug,
            client=client,
        )
        if project_obj is not None:
            logger.info(
                f"Project with slug {project_slug!r} already exists "
                f"in namespace {namespace_slug!r}."
            )
        else:
            logger.info(
                f"Project with slug {project_slug!r} does not exist "
                f"in namespace {namespace_slug!r}. "
                f"Creating it now with RESTRICTED visibility."
            )
            project_obj = create_project(
                namespace_slug=namespace_slug,
                display_name=project_display_name,
                visibility="RESTRICTED",
                client=client,
            )
    except Exception as e:
        logger.error(f"An error occurred while retrieving or creating the project: {e}")
        raise

    # Dynamically construct the payload to exclude None values
    payload = {}
    if name is not None:
        payload["name"] = name
    if description is not None:
        payload["description"] = description
    if status is not None:
        payload["status"] = status.value

    # Create the run
    endpoint = f"/api/v0/{namespace_slug}/projects/{project_slug}/runs/"

    try:
        response = client.post(endpoint, json=payload)

        if response.status_code == 201:
            data = response.json()
            base_url = os.getenv(
                "TELLURIO_BACKEND_HTTP_BASE_URL", "https://platform.tellurio.ai"
            )
            run_slug = slugify(data["name"])
            logger.info(
                f"Run {data['name']!r} created successfully at: "
                f"{base_url}/{namespace_slug}/projects/{project_slug}/runs/{run_slug}/"
            )

            # Parse date fields
            date_created = datetime.fromisoformat(
                data["date_created"].replace("Z", "+00:00")
            )
            date_updated = datetime.fromisoformat(
                data["date_updated"].replace("Z", "+00:00")
            )

            # Parse project and user fields
            org_obj = RunOrg(
                slug=namespace_slug,
            )
            project_obj = RunProject(
                uuid=data["project"]["uuid"],
                display_name=data["project"]["display_name"],
                slug=data["project"]["slug"],
            )
            user_obj = RunUser(
                uuid=data["user"]["uuid"],
                username=data["user"]["username"],
                slug=data["user"]["slug"],
            )

            # Create and return the Run object
            run = Run(
                uuid=data["uuid"],
                name=data["name"],
                description=data["description"],
                status=RunStatus(data["status"]),
                date_created=date_created,
                date_updated=date_updated,
                organization=org_obj,
                project=project_obj,
                user=user_obj,
            )
            set_active_run(run)
            _register_safeguard(run)
            return run
        else:
            logger.error(
                f"Failed to create run: {response.status_code} - {response.text}"
            )
            response.raise_for_status()
    except Exception as e:
        logger.error(f"An error occurred while creating the run: {e}")
        raise