diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 9f795d2489..87504c94b1 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -2,11 +2,12 @@
 import uuid
 import random
 import socket
-from collections.abc import Mapping
+from collections.abc import Mapping, Iterable
 from datetime import datetime, timezone
 from importlib import import_module
 from typing import TYPE_CHECKING, List, Dict, cast, overload
 import warnings
+import json
 
 from sentry_sdk._compat import check_uwsgi_thread_support
 from sentry_sdk._metrics_batcher import MetricsBatcher
@@ -30,6 +31,7 @@
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.tracing import trace
+from sentry_sdk.traces import SpanStatus
 from sentry_sdk.tracing_utils import has_span_streaming_enabled
 from sentry_sdk.transport import (
     HttpTransportCore,
@@ -38,6 +40,7 @@
 )
 from sentry_sdk.consts import (
     SPANDATA,
+    SPANSTATUS,
     DEFAULT_MAX_VALUE_LENGTH,
     DEFAULT_OPTIONS,
     INSTRUMENTER,
@@ -56,6 +59,8 @@
 )
 from sentry_sdk.scrubber import EventScrubber
 from sentry_sdk.monitor import Monitor
+from sentry_sdk.envelope import Item, PayloadRef
+from sentry_sdk.utils import datetime_from_isoformat
 
 if TYPE_CHECKING:
     from typing import Any
@@ -66,7 +71,15 @@
     from typing import Union
     from typing import TypeVar
 
-    from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
+    from sentry_sdk._types import (
+        Event,
+        Hint,
+        SDKInfo,
+        Log,
+        Metric,
+        EventDataCategory,
+        SerializedAttributeValue,
+    )
     from sentry_sdk.integrations import Integration
     from sentry_sdk.scope import Scope
     from sentry_sdk.session import Session
@@ -89,6 +102,197 @@
 }
 
 
+def _serialized_v1_attribute_to_serialized_v2_attribute(
+    attribute_value: "Any",
+) -> "Optional[SerializedAttributeValue]":
+    if isinstance(attribute_value, bool):
+        return {
+            "value": attribute_value,
+            "type": "boolean",
+        }
+
+    if isinstance(attribute_value, int):
+        return {
+            "value": attribute_value,
+            "type": "integer",
+        }
+
+    if isinstance(attribute_value, float):
+        return {
+            "value": attribute_value,
+            "type": "double",
+        }
+
+    if isinstance(attribute_value, str):
+        return {
+            "value": attribute_value,
+            "type": "string",
+        }
+
+    if isinstance(attribute_value, list):
+        if not attribute_value:
+            return {"value": [], "type": "array"}
+
+        ty = type(attribute_value[0])
+        if ty in (int, str, bool, float) and all(
+            type(v) is ty for v in attribute_value
+        ):
+            return {
+                "value": attribute_value,
+                "type": "array",
+            }
+
+    # Types returned when the serializer for V1 span attributes recurses into some container types.
+    if isinstance(attribute_value, (dict, list)):
+        return {
+            "value": json.dumps(attribute_value),
+            "type": "string",
+        }
+
+    if attribute_value is None:
+        return {
+            "value": "None",
+            "type": "string",
+        }
+
+    return None
+
+
+def _serialized_v1_span_to_serialized_v2_span(
+    span: "dict[str, Any]", event: "Event"
+) -> "dict[str, Any]":
+    # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
+    res: "dict[str, Any]" = {
+        "status": SpanStatus.OK.value,
+        "is_segment": False,
+    }
+
+    if "trace_id" in span:
+        res["trace_id"] = span["trace_id"]
+
+    if "span_id" in span:
+        res["span_id"] = span["span_id"]
+
+    if "description" in span:
+        res["name"] = span["description"]
+
+    if "start_timestamp" in span:
+        start_timestamp = None
+        try:
+            start_timestamp = datetime_from_isoformat(span["start_timestamp"])
+        except Exception:
+            pass
+
+        if start_timestamp is not None:
+            res["start_timestamp"] = start_timestamp.timestamp()
+
+    if "timestamp" in span:
+        end_timestamp = None
+        try:
+            end_timestamp = datetime_from_isoformat(span["timestamp"])
+        except Exception:
+            pass
+
+        if end_timestamp is not None:
+            res["end_timestamp"] = end_timestamp.timestamp()
+
+    if "parent_span_id" in span:
+        res["parent_span_id"] = span["parent_span_id"]
+
+    if "status" in span and span["status"] != SPANSTATUS.OK:
+        res["status"] = "error"
+
+    attributes: "Dict[str, Any]" = {}
+
+    if "op" in span:
+        attributes["sentry.op"] = span["op"]
+    if "origin" in span:
+        attributes["sentry.origin"] = span["origin"]
+
+    span_data = span.get("data")
+    if isinstance(span_data, dict):
+        attributes.update(span_data)
+
+    span_tags = span.get("tags")
+    if isinstance(span_tags, dict):
+        attributes.update(span_tags)
+
+    # See Scope._apply_user_attributes_to_telemetry() for user attributes.
+    user = event.get("user")
+    if isinstance(user, dict):
+        if "id" in user:
+            attributes["user.id"] = user["id"]
+        if "username" in user:
+            attributes["user.name"] = user["username"]
+        if "email" in user:
+            attributes["user.email"] = user["email"]
+
+    # See Scope.set_global_attributes() for release, environment, and SDK metadata.
+    if "release" in event:
+        attributes["sentry.release"] = event["release"]
+    if "environment" in event:
+        attributes["sentry.environment"] = event["environment"]
+    if "transaction" in event:
+        attributes["sentry.segment.name"] = event["transaction"]
+
+    trace_context = event.get("contexts", {}).get("trace", {})
+    if "span_id" in trace_context:
+        attributes["sentry.segment.id"] = trace_context["span_id"]
+
+    sdk_info = event.get("sdk")
+    if isinstance(sdk_info, dict):
+        if "name" in sdk_info:
+            attributes["sentry.sdk.name"] = sdk_info["name"]
+        if "version" in sdk_info:
+            attributes["sentry.sdk.version"] = sdk_info["version"]
+
+    if not attributes:
+        return res
+
+    res["attributes"] = {}
+    for key, value in attributes.items():
+        converted_value = _serialized_v1_attribute_to_serialized_v2_attribute(value)
+        if converted_value is None:
+            continue
+
+        res["attributes"][key] = converted_value
+
+    # Remove redundant attribute, as status is stored in the status field.
+    if "status" in res["attributes"]:
+        del res["attributes"]["status"]
+
+    return res
+
+
+def _split_gen_ai_spans(
+    event_opt: "Event",
+) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]":
+    if "spans" not in event_opt:
+        return None
+
+    spans: "Any" = event_opt["spans"]
+    if isinstance(spans, AnnotatedValue):
+        spans = spans.value
+
+    if not isinstance(spans, Iterable):
+        return None
+
+    non_gen_ai_spans = []
+    gen_ai_spans = []
+    for span in spans:
+        if not isinstance(span, dict):
+            non_gen_ai_spans.append(span)
+            continue
+
+        span_op = span.get("op")
+        if isinstance(span_op, str) and span_op.startswith("gen_ai."):
+            gen_ai_spans.append(span)
+        else:
+            non_gen_ai_spans.append(span)
+
+    return non_gen_ai_spans, gen_ai_spans
+
+
 def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
     if args and (isinstance(args[0], (bytes, str)) or args[0] is None):
         dsn: "Optional[str]" = args[0]
@@ -909,10 +1113,42 @@ def capture_event(
 
         envelope = Envelope(headers=headers)
 
-        if is_transaction:
-            if isinstance(profile, Profile):
-                envelope.add_profile(profile.to_json(event_opt, self.options))
+        if is_transaction and isinstance(profile, Profile):
+            envelope.add_profile(profile.to_json(event_opt, self.options))
+
+        if is_transaction and not self.options["_experiments"].get(
+            "gen_ai_as_v2_spans", False
+        ):
             envelope.add_transaction(event_opt)
+        elif is_transaction:
+            split_spans = _split_gen_ai_spans(event_opt)
+            if split_spans is None or not split_spans[1]:
+                envelope.add_transaction(event_opt)
+            else:
+                non_gen_ai_spans, gen_ai_spans = split_spans
+
+                event_opt["spans"] = non_gen_ai_spans
+                envelope.add_transaction(event_opt)
+
+                converted_gen_ai_spans = [
+                    _serialized_v1_span_to_serialized_v2_span(span, event)
+                    for span in gen_ai_spans
+                    if isinstance(span, dict)
+                ]
+
+                envelope.add_item(
+                    Item(
+                        type=SpanBatcher.TYPE,
+                        content_type=SpanBatcher.CONTENT_TYPE,
+                        headers={
+                            "item_count": len(converted_gen_ai_spans),
+                        },
+                        payload=PayloadRef(
+                            json={"items": converted_gen_ai_spans},
+                        ),
+                    )
+                )
+
         elif is_checkin:
             envelope.add_checkin(event_opt)
         else:
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 73e5a6d9cb..82107b49ee 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -86,6 +86,7 @@ class CompressionAlgo(Enum):
             "trace_lifecycle": Optional[Literal["static", "stream"]],
             "ignore_spans": Optional[IgnoreSpansConfig],
             "suppress_asgi_chained_exceptions": Optional[bool],
+            "gen_ai_as_v2_spans": Optional[bool],
         },
         total=False,
     )
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index e86f7e1fa9..b19cca9347 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -91,14 +91,15 @@ async def __call__(self, *args, **kwargs):
     ],
 )
 def test_nonstreaming_create_message(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -120,37 +121,38 @@ def test_nonstreaming_create_message(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -164,14 +166,15 @@ def test_nonstreaming_create_message(
     ],
 )
 async def test_nonstreaming_create_message_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -193,36 +196,37 @@ async def test_nonstreaming_create_message_async(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.parametrize(
@@ -236,7 +240,7 @@ async def test_nonstreaming_create_message_async(
 )
 def test_streaming_create_message(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -285,8 +289,9 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -308,42 +313,45 @@ def test_streaming_create_message(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_streaming_create_message_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -390,8 +398,9 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -415,31 +424,34 @@ def test_streaming_create_message_close(
 
             messages.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -448,7 +460,7 @@ def test_streaming_create_message_close(
 )
 def test_streaming_create_message_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -490,8 +502,9 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -513,34 +526,36 @@ def test_streaming_create_message_api_error(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -555,7 +570,7 @@ def test_streaming_create_message_api_error(
 )
 def test_stream_messages(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -604,8 +619,9 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -628,42 +644,45 @@ def test_stream_messages(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_stream_messages_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -710,8 +729,9 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -740,31 +760,34 @@ def test_stream_messages_close(
 
                 stream.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -773,7 +796,7 @@ def test_stream_messages_close(
 )
 def test_stream_messages_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -815,8 +838,9 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -839,34 +863,36 @@ def test_stream_messages_api_error(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -882,7 +908,7 @@ def test_stream_messages_api_error(
 )
 async def test_streaming_create_message_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -935,8 +961,9 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -958,44 +985,45 @@ async def test_streaming_create_message_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1045,8 +1073,9 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1069,31 +1098,34 @@ async def test_streaming_create_message_async_close(
                 await messages.__anext__()
             await messages.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1103,7 +1135,7 @@ async def test_streaming_create_message_async_close(
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1148,8 +1180,9 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1171,34 +1204,36 @@ async def test_streaming_create_message_async_api_error(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -1214,7 +1249,7 @@ async def test_streaming_create_message_async_api_error(
 )
 async def test_stream_message_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1266,8 +1301,9 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1290,37 +1326,38 @@ async def test_stream_message_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1330,7 +1367,7 @@ async def test_stream_message_async(
 @pytest.mark.asyncio
 async def test_stream_messages_async_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1375,8 +1412,9 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1399,41 +1437,43 @@ async def test_stream_messages_async_api_error(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
 async def test_stream_messages_async_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1483,8 +1523,9 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1515,31 +1556,34 @@ async def test_stream_messages_async_close(
 
                 await stream.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1557,7 +1601,7 @@ async def test_stream_messages_async_close(
 )
 def test_streaming_create_message_with_input_json_delta(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1636,8 +1680,9 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1659,38 +1704,36 @@ def test_streaming_create_message_with_input_json_delta(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.skipif(
@@ -1708,7 +1751,7 @@ def test_streaming_create_message_with_input_json_delta(
 )
 def test_stream_messages_with_input_json_delta(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1787,8 +1830,9 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1811,38 +1855,36 @@ def test_stream_messages_with_input_json_delta(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -1861,7 +1903,7 @@ def test_stream_messages_with_input_json_delta(
 )
 async def test_streaming_create_message_with_input_json_delta_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1946,8 +1988,9 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1969,39 +2012,37 @@ async def test_streaming_create_message_with_input_json_delta_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -2020,7 +2061,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
 )
 async def test_stream_message_with_input_json_delta_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2105,8 +2146,9 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2129,44 +2171,46 @@ async def test_stream_message_with_input_json_delta_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_exception_message_create(sentry_init, capture_events):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+def test_exception_message_create(sentry_init, capture_items):
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(
@@ -2179,14 +2223,20 @@ def test_exception_message_create(sentry_init, capture_events):
             max_tokens=1024,
         )
 
-    (event, transaction) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-def test_span_status_error(sentry_init, capture_events):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+def test_span_status_error(sentry_init, capture_items):
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
         client = Anthropic(api_key="z")
@@ -2200,18 +2250,23 @@ def test_span_status_error(sentry_init, capture_events):
                 max_tokens=1024,
             )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_span_status_error_async(sentry_init, capture_events):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+async def test_span_status_error_async(sentry_init, capture_items):
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
         client = AsyncAnthropic(api_key="z")
@@ -2225,18 +2280,23 @@ async def test_span_status_error_async(sentry_init, capture_events):
                 max_tokens=1024,
             )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_exception_message_create_async(sentry_init, capture_events):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+async def test_exception_message_create_async(sentry_init, capture_items):
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(
@@ -2249,17 +2309,20 @@ async def test_exception_message_create_async(sentry_init, capture_events):
             max_tokens=1024,
         )
 
-    (event, transaction) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-def test_span_origin(sentry_init, capture_events):
+def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2274,21 +2337,23 @@ def test_span_origin(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_async(sentry_init, capture_events):
+async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2303,12 +2368,13 @@ async def test_span_origin_async(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.skipif(
@@ -2349,6 +2415,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with start_transaction(name="test"):
@@ -2392,15 +2459,16 @@ def test_set_output_data_with_input_json_delta(sentry_init):
     ],
 )
 def test_anthropic_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
 
@@ -2425,29 +2493,29 @@ def mock_messages_create(*args, **kwargs):
             model="claude-3-opus", max_tokens=10, messages=test_messages
         )
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
     # Verify that the span was created correctly
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
     # Parse the stored messages
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert stored_messages[0]["role"] == expected_role
 
 
-def test_anthropic_message_truncation(sentry_init, capture_events):
+def test_anthropic_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2466,21 +2534,18 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     with start_transaction():
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2488,18 +2553,20 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
 
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
 @pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(sentry_init, capture_events):
+async def test_anthropic_message_truncation_async(sentry_init, capture_items):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2518,21 +2585,18 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     with start_transaction():
         await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2540,6 +2604,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
 
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -2553,15 +2618,16 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     ],
 )
 def test_nonstreaming_create_message_with_system_prompt(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -2586,46 +2652,46 @@ def test_nonstreaming_create_message_with_system_prompt(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -2639,15 +2705,16 @@ def test_nonstreaming_create_message_with_system_prompt(
     ],
 )
 async def test_nonstreaming_create_message_with_system_prompt_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async)."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -2672,46 +2739,46 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.parametrize(
@@ -2725,7 +2792,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
 )
 def test_streaming_create_message_with_system_prompt(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2775,8 +2842,9 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2802,46 +2870,46 @@ def test_streaming_create_message_with_system_prompt(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.parametrize(
@@ -2855,7 +2923,7 @@ def test_streaming_create_message_with_system_prompt(
 )
 def test_stream_messages_with_system_prompt(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2905,8 +2973,9 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2930,46 +2999,46 @@ def test_stream_messages_with_system_prompt(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -2984,7 +3053,7 @@ def test_stream_messages_with_system_prompt(
 )
 async def test_stream_message_with_system_prompt_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3037,8 +3106,9 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3062,46 +3132,46 @@ async def test_stream_message_with_system_prompt_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -3116,7 +3186,7 @@ async def test_stream_message_with_system_prompt_async(
 )
 async def test_streaming_create_message_with_system_prompt_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3169,8 +3239,9 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3196,56 +3267,57 @@ async def test_streaming_create_message_with_system_prompt_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_system_prompt_with_complex_structure(sentry_init, capture_events):
+def test_system_prompt_with_complex_structure(sentry_init, capture_items):
     """Test that complex system prompt structures (list of text blocks) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3268,17 +3340,18 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events):
         )
 
     assert response == EXAMPLE_MESSAGE
-    assert len(events) == 1
-    (event,) = events
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
-    system_instructions = json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+    system_instructions = json.loads(
+        span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+    )
 
     # System content should be a list of text blocks
     assert isinstance(system_instructions, list)
@@ -3287,8 +3360,8 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events):
         {"type": "text", "content": "Be concise and clear."},
     ]
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3490,14 +3563,15 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-def test_message_with_base64_image(sentry_init, capture_events):
+def test_message_with_base64_image(sentry_init, capture_items):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3521,12 +3595,11 @@ def test_message_with_base64_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3541,14 +3614,15 @@ def test_message_with_base64_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_url_image(sentry_init, capture_events):
+def test_message_with_url_image(sentry_init, capture_items):
     """Test that messages with URL-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3571,11 +3645,10 @@ def test_message_with_url_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3585,14 +3658,15 @@ def test_message_with_url_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_file_image(sentry_init, capture_events):
+def test_message_with_file_image(sentry_init, capture_items):
     """Test that messages with file_id-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3616,11 +3690,10 @@ def test_message_with_file_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3630,14 +3703,15 @@ def test_message_with_file_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_base64_pdf(sentry_init, capture_events):
+def test_message_with_base64_pdf(sentry_init, capture_items):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3651,7 +3725,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "data": "JVBERi0xLjQKJeLj...base64pdfdata",
+                        "attributes": "JVBERi0xLjQKJeLj...base64pdfdata",
                     },
                 },
             ],
@@ -3661,11 +3735,10 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -3675,14 +3748,15 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
     }
 
 
-def test_message_with_url_pdf(sentry_init, capture_events):
+def test_message_with_url_pdf(sentry_init, capture_items):
     """Test that messages with URL-referenced PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3705,11 +3779,10 @@ def test_message_with_url_pdf(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3719,14 +3792,15 @@ def test_message_with_url_pdf(sentry_init, capture_events):
     }
 
 
-def test_message_with_file_document(sentry_init, capture_events):
+def test_message_with_file_document(sentry_init, capture_items):
     """Test that messages with file_id-referenced documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3750,11 +3824,10 @@ def test_message_with_file_document(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3764,14 +3837,15 @@ def test_message_with_file_document(sentry_init, capture_events):
     }
 
 
-def test_message_with_mixed_content(sentry_init, capture_events):
+def test_message_with_mixed_content(sentry_init, capture_items):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3785,7 +3859,7 @@ def test_message_with_mixed_content(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "image/png",
-                        "data": "iVBORw0KGgo...base64imagedata",
+                        "attributes": "iVBORw0KGgo...base64imagedata",
                     },
                 },
                 {
@@ -3800,7 +3874,7 @@ def test_message_with_mixed_content(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "data": "JVBERi0xLjQK...base64pdfdata",
+                        "attributes": "JVBERi0xLjQK...base64pdfdata",
                     },
                 },
                 {"type": "text", "text": "Please provide a detailed analysis."},
@@ -3811,11 +3885,10 @@ def test_message_with_mixed_content(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -3847,14 +3920,15 @@ def test_message_with_mixed_content(sentry_init, capture_events):
     }
 
 
-def test_message_with_multiple_images_different_formats(sentry_init, capture_events):
+def test_message_with_multiple_images_different_formats(sentry_init, capture_items):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3867,7 +3941,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64data1...",
+                        "attributes": "base64data1...",
                     },
                 },
                 {
@@ -3893,11 +3967,10 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
@@ -3922,14 +3995,15 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
     assert content[3] == {"type": "text", "text": "Compare these three images."}
 
 
-def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events):
+def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items):
     """Test that binary content is not stored when send_default_pii is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3943,7 +4017,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64encodeddatahere...",
+                        "attributes": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -3953,22 +4027,22 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
     # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
 
 
-def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_events):
+def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items):
     """Test that binary content is not stored when include_prompts is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3982,7 +4056,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64encodeddatahere...",
+                        "attributes": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -3992,18 +4066,21 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
     # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
 
 
-def test_cache_tokens_nonstreaming(sentry_init, capture_events):
+def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     """Test cache read/write tokens are tracked for non-streaming responses."""
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4029,16 +4106,16 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
             model="claude-3-5-sonnet-20241022",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
-def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events):
+def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items):
     """
     Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
 
@@ -4050,8 +4127,12 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even
         Usage(input_tokens=19, output_tokens=14,
               cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4077,16 +4158,16 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
 
 
-def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events):
+def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items):
     """
     Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
 
@@ -4098,8 +4179,12 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event
         Usage(input_tokens=19, output_tokens=14,
               cache_creation_input_tokens=0, cache_read_input_tokens=2846)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4125,18 +4210,18 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
 def test_input_tokens_include_cache_read_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4175,8 +4260,12 @@ def test_input_tokens_include_cache_read_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4192,18 +4281,18 @@ def test_input_tokens_include_cache_read_streaming(
             ):
                 pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
 def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4241,8 +4330,12 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4258,24 +4351,28 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
                 for event in stream:
                     pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 + 2846 = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
-def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
+def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
     """
     Test that input_tokens is unchanged when there are no cached tokens.
 
     Real Anthropic response (from E2E test, simple call without caching):
         Usage(input_tokens=20, output_tokens=12)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4299,15 +4396,15 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
 
 
 def test_cache_tokens_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4342,8 +4439,12 @@ def test_cache_tokens_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4359,17 +4460,17 @@ def test_cache_tokens_streaming(
             ):
                 pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
 def test_stream_messages_cache_tokens(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")
@@ -4402,8 +4503,12 @@ def test_stream_messages_cache_tokens(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4419,10 +4524,10 @@ def test_stream_messages_cache_tokens(
                 for event in stream:
                     pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 6e91ba6634..ae31fe565b 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -124,14 +124,15 @@ def create_test_config(
     ],
 )
 def test_nonstreaming_generate_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -146,38 +147,37 @@ def test_nonstreaming_generate_content(
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Tell me a joke", config=config
             )
-    assert len(events) == 1
-    (event,) = events
 
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai"
 
-    assert len(event["spans"]) == 1
-    chat_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Check chat span
-    assert chat_span["op"] == OP.GEN_AI_CHAT
-    assert chat_span["description"] == "chat gemini-1.5-flash"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+    assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert chat_span["name"] == "chat gemini-1.5-flash"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
 
     if send_default_pii and include_prompts:
         # Response text is stored as a JSON array
-        response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         # Parse the JSON array
         response_texts = json.loads(response_text)
         assert response_texts == ["Hello! How can I help you today?"]
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"]
 
     # Check token usage
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
     # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
 
 
 @pytest.mark.parametrize("generate_content_config", (False, True))
@@ -210,7 +210,7 @@ def test_nonstreaming_generate_content(
 )
 def test_generate_content_with_system_instruction(
     sentry_init,
-    capture_events,
+    capture_items,
     mock_genai_client,
     generate_content_config,
     system_instructions,
@@ -220,8 +220,9 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -243,16 +244,15 @@ def test_generate_content_with_system_instruction(
                 config=config,
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     if expected_texts is None:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"]
         return
 
     # (PII is enabled and include_prompts is True in this test)
     system_instructions = json.loads(
-        invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
     )
 
     assert system_instructions == [
@@ -260,12 +260,13 @@ def test_generate_content_with_system_instruction(
     ]
 
 
-def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
+def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -319,18 +320,17 @@ def get_weather(location: str) -> str:
                 model="gemini-1.5-flash", contents="What's the weather?", config=config
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Check that tools are recorded (data is serialized as a string)
-    tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+    tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
     # Parse the JSON string to verify content
     tools_data = json.loads(tools_data_str)
     assert len(tools_data) == 2
 
     # The order of tools may not be guaranteed, so sort by name and description for comparison
     sorted_tools = sorted(
-        tools_data, key=lambda t: (t.get("name", ""), t.get("description", ""))
+        tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
     )
 
     # The function tool
@@ -342,13 +342,14 @@ def get_weather(location: str) -> str:
     assert sorted_tools[1]["description"] == "Get weather information (tool object)"
 
 
-def test_tool_execution(sentry_init, capture_events):
+def test_tool_execution(sentry_init, capture_items):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -366,25 +367,26 @@ def get_weather(location: str) -> str:
 
     assert result == "The weather in San Francisco is sunny"
 
-    (event,) = events
-    assert len(event["spans"]) == 1
-    tool_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    tool_span = next(item.payload for item in items if item.type == "span")
 
-    assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL
-    assert tool_span["description"] == "execute_tool get_weather"
-    assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
+    assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+    assert tool_span["name"] == "execute_tool get_weather"
+    assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
     assert (
-        tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+        tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
         == "Get the weather for a location"
     )
 
 
-def test_error_handling(sentry_init, capture_events, mock_genai_client):
+def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event", "transaction")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -399,8 +401,8 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client):
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, transaction_event = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -408,14 +410,15 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client):
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
 
 
-def test_streaming_generate_content(sentry_init, capture_events, mock_genai_client):
+def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client):
     """Test streaming with generate_content_stream, verifying chunk accumulation."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create streaming chunks - simulating a multi-chunk response
     # Chunk 1: First part of text with partial usage metadata
@@ -497,40 +500,42 @@ def test_streaming_generate_content(sentry_init, capture_events, mock_genai_clie
     assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
     assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
 
-    (event,) = events
-
-    assert len(event["spans"]) == 1
-    chat_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Check that streaming flag is set on both spans
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
     # Verify accumulated response text (all chunks combined)
     expected_full_text = "Hello! How can I help you today?"
     # Response text is stored as a JSON string
-    chat_response_text = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])
+    chat_response_text = json.loads(
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    )
     assert chat_response_text == [expected_full_text]
 
     # Verify finish reasons (only the final chunk has a finish reason)
     # When there's a single finish reason, it's stored as a plain string (not JSON)
-    assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"]
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+    assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
 
     # Verify model name
-    assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
 
 
-def test_span_origin(sentry_init, capture_events, mock_genai_client):
+def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span", "transaction")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -543,22 +548,22 @@ def test_span_origin(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents="Test origin", config=config
             )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
 
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
-def test_response_without_usage_metadata(
-    sentry_init, capture_events, mock_genai_client
-):
+
+def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client):
     """Test handling of responses without usage metadata"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without usage metadata
     response_json = {
@@ -584,23 +589,23 @@ def test_response_without_usage_metadata(
                 model="gemini-1.5-flash", contents="Test", config=config
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Usage data should not be present
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"]
-    assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"]
-    assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"]
+    assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"]
+    assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"]
 
 
-def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
+def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
     """Test handling of multiple response candidates"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with multiple candidates
     multi_candidate_json = {
@@ -638,12 +643,11 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents="Generate multiple", config=config
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Should capture all responses
     # Response text is stored as a JSON string when there are multiple responses
-    response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     if isinstance(response_text, str) and response_text.startswith("["):
         # It's a JSON array
         response_list = json.loads(response_text)
@@ -654,18 +658,19 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
 
     # Finish reasons are serialized as JSON
     finish_reasons = json.loads(
-        chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
     )
     assert finish_reasons == ["STOP", "MAX_TOKENS"]
 
 
-def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_client):
+def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client):
     """Test that all configuration parameters are properly recorded"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -686,26 +691,26 @@ def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_cl
                 model="gemini-1.5-flash", contents="Test all params", config=config
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Check all parameters are recorded
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
 
 
-def test_empty_response(sentry_init, capture_events, mock_genai_client):
+def test_empty_response(sentry_init, capture_items, mock_genai_client):
     """Test handling of minimal response with no content"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Minimal response with empty candidates array
     minimal_response_json = {"candidates": []}
@@ -723,20 +728,21 @@ def test_empty_response(sentry_init, capture_events, mock_genai_client):
     assert response is not None
     assert len(response.candidates) == 0
 
-    (event,) = events
     # Should still create spans even with empty candidates
-    assert len(event["spans"]) == 1
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
 
 
 def test_response_with_different_id_fields(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test handling of different response ID field names"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with response_id and model_version
     response_json = {
@@ -763,20 +769,22 @@ def test_response_with_different_id_fields(
                 model="gemini-1.5-flash", contents="Test", config=create_test_config()
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
+    assert (
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL]
+        == "gemini-1.5-flash-001"
+    )
 
 
-def test_tool_with_async_function(sentry_init, capture_events):
+def test_tool_with_async_function(sentry_init):
     """Test that async tool functions are properly wrapped"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    capture_events()
 
     # Create an async tool function
     async def async_tool(param: str) -> str:
@@ -792,14 +800,15 @@ async def async_tool(param: str) -> str:
     assert hasattr(wrapped_async_tool, "__wrapped__")  # Should preserve original
 
 
-def test_contents_as_none(sentry_init, capture_events, mock_genai_client):
+def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
     """Test handling when contents parameter is None"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -811,22 +820,22 @@ def test_contents_as_none(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents=None, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Should handle None contents gracefully
-    messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
+    messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
     # Should only have system message if any, not user message
     assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages)
 
 
-def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
+def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     """Test extraction of tool/function calls from response"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with function calls
     function_call_response_json = {
@@ -875,14 +884,17 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
                 config=create_test_config(),
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]  # The chat span
+    chat_span = next(
+        item.payload for item in items if item.type == "span"
+    )  # The chat span
 
     # Check that tool calls are extracted and stored
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"]
 
     # Parse the JSON string to verify content
-    tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS])
+    tool_calls = json.loads(
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    )
 
     assert len(tool_calls) == 2
 
@@ -902,16 +914,15 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-def test_google_genai_message_truncation(
-    sentry_init, capture_events, mock_genai_client
-):
+def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -930,11 +941,10 @@ def test_google_genai_message_truncation(
                 config=create_test_config(),
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    invoke_span = next(item.payload for item in items if item.type == "span")
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
 
-    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -980,14 +990,15 @@ def test_google_genai_message_truncation(
     ],
 )
 def test_embed_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -1006,47 +1017,50 @@ def test_embed_content(
                 ],
             )
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai_embeddings"
 
     # Should have 1 span for embeddings
-    assert len(event["spans"]) == 1
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (embed_span,) = spans
 
     # Check embeddings span
-    assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["description"] == "embeddings text-embedding-004"
-    assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+    assert embed_span["name"] == "embeddings text-embedding-004"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert (
+        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    )
 
     # Check input texts if PII is allowed
     if send_default_pii and include_prompts:
-        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
         assert input_texts == [
             "What is your name?",
             "What is your favorite color?",
         ]
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
 
     # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
 
 
-def test_embed_content_string_input(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client):
     """Test embed_content with a single string instead of list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1074,25 +1088,26 @@ def test_embed_content_string_input(sentry_init, capture_events, mock_genai_clie
                 contents="Single text input",
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
     assert input_texts == ["Single text input"]
     # Should use token_count from statistics (5), not billable_character_count (10)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
-def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client):
     """Test error handling in embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "event")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -1108,8 +1123,8 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, _ = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1118,14 +1133,15 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl
 
 
 def test_embed_content_without_statistics(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1150,21 +1166,22 @@ def test_embed_content_without_statistics(
                 contents=["Test without statistics", "Another test"],
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # No usage tokens since there are no statistics in older versions
     # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
 
 
-def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client):
     """Test that embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
@@ -1177,11 +1194,12 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien
                 contents=["Test origin"],
             )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
 
 @pytest.mark.asyncio
@@ -1195,15 +1213,16 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien
     ],
 )
 async def test_async_embed_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     """Test async embed_content method."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the async HTTP response
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -1222,50 +1241,53 @@ async def test_async_embed_content(
                 ],
             )
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai_embeddings_async"
 
     # Should have 1 span for embeddings
-    assert len(event["spans"]) == 1
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (embed_span,) = spans
 
     # Check embeddings span
-    assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["description"] == "embeddings text-embedding-004"
-    assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+    assert embed_span["name"] == "embeddings text-embedding-004"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert (
+        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    )
 
     # Check input texts if PII is allowed
     if send_default_pii and include_prompts:
-        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
         assert input_texts == [
             "What is your name?",
             "What is your favorite color?",
         ]
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
 
     # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_string_input(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test async embed_content with a single string instead of list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1293,28 +1315,29 @@ async def test_async_embed_content_string_input(
                 contents="Single text input",
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
     assert input_texts == ["Single text input"]
     # Should use token_count from statistics (5), not billable_character_count (10)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_error_handling(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test error handling in async embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "event")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -1330,8 +1353,8 @@ async def test_async_embed_content_error_handling(
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, _ = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1341,14 +1364,15 @@ async def test_async_embed_content_error_handling(
 
 @pytest.mark.asyncio
 async def test_async_embed_content_without_statistics(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test async embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1373,24 +1397,25 @@ async def test_async_embed_content_without_statistics(
                 contents=["Test without statistics", "Another test"],
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # No usage tokens since there are no statistics in older versions
     # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_span_origin(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test that async embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
@@ -1403,24 +1428,26 @@ async def test_async_embed_content_span_origin(
                 contents=["Test origin"],
             )
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
 
 # Integration tests for generate_content with different input message formats
 def test_generate_content_with_content_object(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with Content object input."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1437,10 +1464,9 @@ def test_generate_content_with_content_object(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1449,15 +1475,16 @@ def test_generate_content_with_content_object(
 
 
 def test_generate_content_with_dict_format(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with dict format input (ContentDict)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1472,10 +1499,9 @@ def test_generate_content_with_dict_format(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1483,16 +1509,15 @@ def test_generate_content_with_dict_format(
     ]
 
 
-def test_generate_content_with_file_data(
-    sentry_init, capture_events, mock_genai_client
-):
+def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client):
     """Test generate_content with file_data (external file reference)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1516,10 +1541,9 @@ def test_generate_content_with_file_data(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1534,15 +1558,16 @@ def test_generate_content_with_file_data(
 
 
 def test_generate_content_with_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with inline_data (binary data)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1565,10 +1590,9 @@ def test_generate_content_with_inline_data(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1580,15 +1604,16 @@ def test_generate_content_with_inline_data(
 
 
 def test_generate_content_with_function_response(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1622,10 +1647,9 @@ def test_generate_content_with_function_response(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -1635,15 +1659,16 @@ def test_generate_content_with_function_response(
 
 
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1668,10 +1693,9 @@ def test_generate_content_with_mixed_string_and_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
@@ -1679,15 +1703,16 @@ def test_generate_content_with_mixed_string_and_content(
 
 
 def test_generate_content_with_part_object_directly(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with Part object directly (not wrapped in Content)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1702,17 +1727,16 @@ def test_generate_content_with_part_object_directly(
                 model="gemini-1.5-flash", contents=part, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
 def test_generate_content_with_list_of_dicts(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -1725,8 +1749,9 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1745,25 +1770,25 @@ def test_generate_content_with_list_of_dicts(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
 
 
 def test_generate_content_with_dict_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with dict format containing inline_data."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1784,10 +1809,9 @@ def test_generate_content_with_dict_inline_data(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1801,14 +1825,15 @@ def test_generate_content_with_dict_inline_data(
 
 
 def test_generate_content_without_parts_property_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1825,10 +1850,9 @@ def test_generate_content_without_parts_property_inline_data(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(messages) == 1
 
@@ -1845,14 +1869,15 @@ def test_generate_content_without_parts_property_inline_data(
 
 
 def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1874,10 +1899,9 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
 
@@ -2162,7 +2186,9 @@ def test_extract_contents_messages_dict_inline_data():
     """Test extract_contents_messages with dict containing inline_data"""
     content_dict = {
         "role": "user",
-        "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}],
+        "parts": [
+            {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}}
+        ],
     }
     result = extract_contents_messages(content_dict)
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 9dd15ca4b5..eaac8c1ab1 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -471,7 +471,7 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock):
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_text_generation_api: "Any",
@@ -480,8 +480,9 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
@@ -492,23 +493,22 @@ def test_text_generation(
             details=True,
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.text_completion"
-    assert span["description"] == "text_completion test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+    assert span["name"] == "text_completion test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "text_completion",
@@ -516,6 +516,14 @@ def test_text_generation(
         "gen_ai.response.finish_reasons": "length",
         "gen_ai.response.streaming": False,
         "gen_ai.usage.total_tokens": 10,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.text_completion",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -528,10 +536,10 @@ def test_text_generation(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
     # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["data"]
+    assert "gen_ai.response.model" not in span["attributes"]
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -539,7 +547,7 @@ def test_text_generation(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation_streaming(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_text_generation_api_streaming: "Any",
@@ -548,8 +556,9 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
@@ -561,23 +570,22 @@ def test_text_generation_streaming(
         ):
             pass
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.text_completion"
-    assert span["description"] == "text_completion test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+    assert span["name"] == "text_completion test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "text_completion",
@@ -585,6 +593,14 @@ def test_text_generation_streaming(
         "gen_ai.response.finish_reasons": "length",
         "gen_ai.response.streaming": True,
         "gen_ai.usage.total_tokens": 10,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.text_completion",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -597,10 +613,10 @@ def test_text_generation_streaming(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
     # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["data"]
+    assert "gen_ai.response.model" not in span["attributes"]
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -608,7 +624,7 @@ def test_text_generation_streaming(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api: "Any",
@@ -617,8 +633,9 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -628,23 +645,22 @@ def test_chat_completion(
             stream=False,
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -655,6 +671,14 @@ def test_chat_completion(
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -671,7 +695,7 @@ def test_chat_completion(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -679,7 +703,7 @@ def test_chat_completion(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_streaming: "Any",
@@ -688,8 +712,9 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -701,23 +726,22 @@ def test_chat_completion_streaming(
             )
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -725,6 +749,14 @@ def test_chat_completion_streaming(
         "gen_ai.response.finish_reasons": "stop",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -744,15 +776,15 @@ def test_chat_completion_streaming(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_chat_completion_api_error(
-    sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any"
+    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
-    sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
+    items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -762,32 +794,29 @@ def test_chat_completion_api_error(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (
-        error,
-        transaction,
-    ) = events
-
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
     assert not error["exception"]["values"][0]["mechanism"]["handled"]
 
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
-    assert span["status"] == "internal_error"
-    assert span.get("tags", {}).get("status") == "internal_error"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+    assert span["status"] == "error"
 
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         error["contexts"]["trace"]["trace_id"]
         == transaction["contexts"]["trace"]["trace_id"]
@@ -795,18 +824,26 @@ def test_chat_completion_api_error(
     expected_data = {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "test-model",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_span_status_error(
-    sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any"
+    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
-    sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
+    items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -816,22 +853,22 @@ def test_span_status_error(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (error, transaction) = events
+    (error,) = [item.payload for item in items if item.type == "event"]
     assert error["level"] == "error"
 
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -839,7 +876,7 @@ def test_span_status_error(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_with_tools(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_tools: "Any",
@@ -848,8 +885,9 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -875,23 +913,22 @@ def test_chat_completion_with_tools(
             tool_choice="auto",
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -902,6 +939,14 @@ def test_chat_completion_with_tools(
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -919,7 +964,7 @@ def test_chat_completion_with_tools(
         assert "gen_ai.response.text" not in expected_data
         assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -927,7 +972,7 @@ def test_chat_completion_with_tools(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming_with_tools(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_streaming_tools: "Any",
@@ -936,8 +981,9 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -966,23 +1012,22 @@ def test_chat_completion_streaming_with_tools(
             )
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -991,6 +1036,14 @@ def test_chat_completion_streaming_with_tools(
         "gen_ai.response.finish_reasons": "tool_calls",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -1014,4 +1067,4 @@ def test_chat_completion_streaming_with_tools(
         assert "gen_ai.response.text" not in expected_data
         assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 498a5d6f4a..ef27d45767 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -97,7 +97,7 @@ def _llm_type(self) -> str:
 
 def test_langchain_text_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
 ):
     sentry_init(
@@ -108,8 +108,9 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         Completion(
@@ -149,25 +150,29 @@ def test_langchain_text_completion(
             input_text = "What is the capital of France?"
             model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"})
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["description"] == "text_completion gpt-3.5-turbo"
-    assert llm_span["data"]["gen_ai.system"] == "openai"
-    assert llm_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline"
-    assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo"
-    assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris."
-    assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25
-    assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15
+    assert llm_span["name"] == "text_completion gpt-3.5-turbo"
+    assert llm_span["attributes"]["gen_ai.system"] == "openai"
+    assert llm_span["attributes"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline"
+    assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo"
+    assert (
+        llm_span["attributes"]["gen_ai.response.text"]
+        == "The capital of France is Paris."
+    )
+    assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25
+    assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15
 
 
 @pytest.mark.skipif(
@@ -196,7 +201,7 @@ def test_langchain_text_completion(
 )
 def test_langchain_create_agent(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     system_instructions_content,
@@ -212,8 +217,9 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         nonstreaming_responses_model_response,
@@ -250,22 +256,23 @@ def test_langchain_create_agent(
                 },
             )
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
     assert len(chat_spans) == 1
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
-    assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10
-    assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20
-    assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30
+    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30
 
     if send_default_pii and include_prompts:
         assert (
-            chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == "Hello, how can I help you?"
         )
 
@@ -276,7 +283,9 @@ def test_langchain_create_agent(
                     "type": "text",
                     "content": "You are very powerful assistant, but don't know current events",
                 }
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
         else:
             assert [
                 {
@@ -287,11 +296,17 @@ def test_langchain_create_agent(
                     "type": "text",
                     "content": "Be concise and clear.",
                 },
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
 
 
 @pytest.mark.skipif(
@@ -309,7 +324,7 @@ def test_langchain_create_agent(
 )
 def test_tool_execution_span(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -323,8 +338,9 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     responses = responses_tool_call_model_responses(
         tool_name="get_word_length",
@@ -400,60 +416,71 @@ def test_tool_execution_span(
                 },
             )
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
     assert len(chat_spans) == 2
 
-    tool_exec_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
+    tool_exec_spans = list(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+    )
     assert len(tool_exec_spans) == 1
     tool_exec_span = tool_exec_spans[0]
 
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["origin"] == "auto.ai.langchain"
-    assert tool_exec_span["origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
-    assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
-    assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
-    assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
 
-    assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
-    assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
-    assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
-    assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+    assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+    assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+    assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat"
 
     if send_default_pii and include_prompts:
-        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
 
-        assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         # Verify tool calls are recorded when PII is enabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), (
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+            "attributes", {}
+        ), (
             "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
         )
-        tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        tool_calls_data = chat_spans[0]["attributes"][
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+        ]
         assert isinstance(tool_calls_data, str)
         assert "get_word_length" in tool_calls_data
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
         # Verify tool calls are NOT recorded when PII is disabled
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
@@ -461,7 +488,7 @@ def test_tool_execution_span(
 
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
-        tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
         assert "get_word_length" in tools_data
 
 
@@ -488,7 +515,7 @@ def test_tool_execution_span(
 )
 def test_langchain_openai_tools_agent(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     system_instructions_content,
@@ -504,8 +531,9 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -700,40 +728,47 @@ def test_langchain_openai_tools_agent(
         with start_transaction():
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent")
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
-    tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span = next(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+    )
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
+    tool_exec_span = next(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+    )
 
     assert len(chat_spans) == 2
 
-    assert invoke_agent_span["origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["origin"] == "auto.ai.langchain"
-    assert tool_exec_span["origin"] == "auto.ai.langchain"
+    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
     # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+    assert (
+        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
+    )
 
     # Token usage is only available in newer versions of langchain (v0.2+)
     # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
-        assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
-        assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
     if send_default_pii and include_prompts:
-        assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
-        assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+        assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
 
         param_id = request.node.callspec.id
         if "string" in param_id:
@@ -742,7 +777,9 @@ def test_langchain_openai_tools_agent(
                     "type": "text",
                     "content": "You are very powerful assistant, but don't know current events",
                 }
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
         else:
             assert [
                 {
@@ -753,15 +790,21 @@ def test_langchain_openai_tools_agent(
                     "type": "text",
                     "content": "Be concise and clear.",
                 },
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
 
-        assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         # Verify tool calls are recorded when PII is enabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), (
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+            "attributes", {}
+        ), (
             "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
         )
-        tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        tool_calls_data = chat_spans[0]["attributes"][
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+        ]
         assert isinstance(tool_calls_data, (list, str))  # Could be serialized
         if isinstance(tool_calls_data, str):
             assert "get_word_length" in tool_calls_data
@@ -770,45 +813,55 @@ def test_langchain_openai_tools_agent(
             tool_call_str = str(tool_calls_data)
             assert "get_word_length" in tool_call_str
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
         # Verify tool calls are NOT recorded when PII is disabled
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
 
     # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
         "function_call"
     ]
-    assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"]
+    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+        "stop"
+    ]
 
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
-        tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
         assert tools_data is not None, (
             "Available tools should always be recorded regardless of PII settings"
         )
         assert "get_word_length" in tools_data
 
 
-def test_langchain_error(sentry_init, capture_events):
+def test_langchain_error(sentry_init, capture_items):
     global llm_type
     llm_type = "acme-llm"
 
@@ -816,8 +869,9 @@ def test_langchain_error(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -843,19 +897,20 @@ def test_langchain_error(sentry_init, capture_events):
     with start_transaction(), pytest.raises(ValueError):
         list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    error = events[0]
+    error = next(item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_events):
+def test_span_status_error(sentry_init, capture_items):
     global llm_type
     llm_type = "acme-llm"
 
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
         prompt = ChatPromptTemplate.from_messages(
@@ -884,10 +939,13 @@ def test_span_status_error(sentry_init, capture_events):
         with pytest.raises(ValueError):
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    (error, transaction) = events
+    error = next(item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -935,7 +993,9 @@ def _llm_type(self):
         def _identifying_params(self):
             return {}
 
-    sentry_init(integrations=[LangchainIntegration()])
+    sentry_init(
+        integrations=[LangchainIntegration()], _experiments={"gen_ai_as_v2_spans": True}
+    )
 
     # Create a manual SentryLangchainCallback
     manual_callback = SentryLangchainCallback(
@@ -976,6 +1036,7 @@ def test_langchain_callback_manager(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_manager = BaseCallbackManager(handlers=[])
 
@@ -1008,6 +1069,7 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_manager = BaseCallbackManager(handlers=[sentry_callback])
@@ -1040,6 +1102,7 @@ def test_langchain_callback_list(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_callbacks = []
 
@@ -1072,6 +1135,7 @@ def test_langchain_callback_list_existing_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_callbacks = [sentry_callback]
@@ -1100,7 +1164,7 @@ def test_langchain_callback_list_existing_callback(sentry_init):
         assert handler is sentry_callback
 
 
-def test_langchain_message_role_mapping(sentry_init, capture_events):
+def test_langchain_message_role_mapping(sentry_init, capture_items):
     """Test that message roles are properly normalized in langchain integration."""
     global llm_type
     llm_type = "openai-chat"
@@ -1109,8 +1173,9 @@ def test_langchain_message_role_mapping(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1146,19 +1211,18 @@ def test_langchain_message_role_mapping(sentry_init, capture_events):
     with start_transaction():
         list(agent_executor.stream({"input": test_input}))
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find spans with gen_ai operation that should have message data
     gen_ai_spans = [
-        span for span in tx.get("spans", []) if span.get("op", "").startswith("gen_ai")
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op", "").startswith("gen_ai")
     ]
 
     # Check if any span has message data with normalized roles
     message_data_found = False
     for span in gen_ai_spans:
-        span_data = span.get("data", {})
+        span_data = span.get("attributes", {})
         if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
             message_data_found = True
             messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
@@ -1239,7 +1303,7 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-def test_langchain_message_truncation(sentry_init, capture_events):
+def test_langchain_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -1247,8 +1311,9 @@ def test_langchain_message_truncation(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -1291,23 +1356,23 @@ def test_langchain_message_truncation(sentry_init, capture_events):
         )
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
-    assert llm_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline"
+    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline"
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
-    messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
+    messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -1327,7 +1392,7 @@ def test_langchain_message_truncation(sentry_init, capture_events):
     ],
 )
 def test_langchain_embeddings_sync(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that sync embedding methods (embed_documents, embed_query) are properly traced."""
     try:
@@ -1339,8 +1404,9 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1362,27 +1428,28 @@ def test_langchain_embeddings_sync(
         assert len(result) == 2
         mock_embed_documents.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["origin"] == "auto.ai.langchain"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "Hello world" in input_data
@@ -1391,7 +1458,9 @@ def test_langchain_embeddings_sync(
             assert "Hello world" in input_data
             assert "Test document" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -1402,7 +1471,7 @@ def test_langchain_embeddings_sync(
     ],
 )
 def test_langchain_embeddings_embed_query(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that embed_query method is properly traced."""
     try:
@@ -1414,8 +1483,9 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1436,32 +1506,35 @@ def test_langchain_embeddings_embed_query(
         assert len(result) == 3
         mock_embed_query.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "What is the capital of France?" in input_data
         else:
             assert "What is the capital of France?" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -1473,7 +1546,7 @@ def test_langchain_embeddings_embed_query(
 )
 @pytest.mark.asyncio
 async def test_langchain_embeddings_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that async embedding methods (aembed_documents, aembed_query) are properly traced."""
     try:
@@ -1485,8 +1558,9 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async def mock_aembed_documents(self, texts):
         return [[0.1, 0.2, 0.3] for _ in texts]
@@ -1512,38 +1586,41 @@ async def mock_aembed_documents(self, texts):
         assert len(result) == 2
         mock_aembed.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["origin"] == "auto.ai.langchain"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "Async hello" in input_data or "Async test document" in input_data
         else:
             assert "Async hello" in input_data or "Async test document" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.asyncio
-async def test_langchain_embeddings_aembed_query(sentry_init, capture_events):
+async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
     """Test that aembed_query method is properly traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1554,8 +1631,9 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async def mock_aembed_query(self, text):
         return [0.1, 0.2, 0.3]
@@ -1579,24 +1657,25 @@ async def mock_aembed_query(self, text):
         assert len(result) == 3
         mock_aembed.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-    input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+    input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
     # Could be serialized as string
     if isinstance(input_data, str):
         assert "Async query test" in input_data
@@ -1604,7 +1683,7 @@ async def mock_aembed_query(self, text):
         assert "Async query test" in input_data
 
 
-def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
+def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     """Test embeddings when model name is not available."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1614,8 +1693,9 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call and remove model attribute
     with mock.patch.object(
@@ -1635,28 +1715,26 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
         with start_transaction(name="test_embeddings_no_model"):
             embeddings.embed_documents(["Test"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+    assert embeddings_span["name"] == "embeddings"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
     # Model name should not be set if not available
     assert (
-        "gen_ai.request.model" not in embeddings_span["data"]
-        or embeddings_span["data"]["gen_ai.request.model"] is None
+        "gen_ai.request.model" not in embeddings_span["attributes"]
+        or embeddings_span["attributes"]["gen_ai.request.model"] is None
     )
 
 
-def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
+def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
     """Test that embeddings are not traced when integration is disabled."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1664,8 +1742,8 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
         pytest.skip("langchain_openai not installed")
 
     # Initialize without LangchainIntegration
-    sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         OpenAIEmbeddings,
@@ -1680,18 +1758,17 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
             embeddings.embed_documents(["Test"])
 
     # Check that no embeddings spans were created
-    if events:
-        tx = events[0]
-        embeddings_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == "gen_ai.embeddings"
-        ]
-        # Should be empty since integration is disabled
-        assert len(embeddings_spans) == 0
+    spans = [item.payload for item in items if item.type == "span"]
+    embeddings_spans = [
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+    ]
+    # Should be empty since integration is disabled
+    assert len(embeddings_spans) == 0
 
 
-def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
+def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
     """Test that embeddings work with different providers."""
     try:
         from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
@@ -1702,8 +1779,9 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock both providers
     with mock.patch.object(
@@ -1731,26 +1809,24 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
             openai_embeddings.embed_documents(["OpenAI test"])
             azure_embeddings.embed_documents(["Azure test"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     # Should have 2 spans, one for each provider
     assert len(embeddings_spans) == 2
 
     # Verify both spans have proper data
     for span in embeddings_spans:
-        assert span["data"]["gen_ai.operation.name"] == "embeddings"
-        assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
 
 
-def test_langchain_embeddings_error_handling(sentry_init, capture_events):
+def test_langchain_embeddings_error_handling(sentry_init, capture_items):
     """Test that errors in embeddings are properly captured."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1761,8 +1837,9 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the API call to raise an error
     with mock.patch.object(
@@ -1781,15 +1858,16 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events):
             with pytest.raises(ValueError):
                 embeddings.embed_documents(["Test"])
 
-    # The error should be captured
-    assert len(events) >= 1
-    # We should have both the transaction and potentially an error event
-    [e for e in events if e.get("level") == "error"]
+    [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     # Note: errors might not be auto-captured depending on SDK settings,
     # but the span should still be created
 
 
-def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
+def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
     """Test that multiple embeddings calls within a transaction are all traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1800,8 +1878,9 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API calls
     with mock.patch.object(
@@ -1828,32 +1907,31 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
             # Call embed_documents again
             embeddings.embed_documents(["Third batch"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query)
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 3
 
     # Verify all spans have proper data
     for span in embeddings_spans:
-        assert span["data"]["gen_ai.operation.name"] == "embeddings"
-        assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
 
     # Verify the input data is different for each span
     input_data_list = [
-        span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans
+        span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        for span in embeddings_spans
     ]
     # They should all be different (different inputs)
     assert len(set(str(data) for data in input_data_list)) == 3
 
 
-def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
+def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
     """Test that embeddings spans are properly nested within parent spans."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1864,8 +1942,9 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1884,15 +1963,15 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
             with sentry_sdk.start_span(op="custom", name="custom operation"):
                 embeddings.embed_documents(["Test within custom span"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find all spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
+
+    tx = next(item.payload for item in items if item.type == "transaction")
     custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"]
 
     assert len(embeddings_spans) == 1
@@ -1902,11 +1981,11 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
     embeddings_span = embeddings_spans[0]
     custom_span = custom_spans[0]
 
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
     assert custom_span["description"] == "custom operation"
 
 
-def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_events):
+def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_items):
     """Test that embeddings correctly handle both list and string inputs."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1917,8 +1996,9 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API calls
     with mock.patch.object(
@@ -1943,21 +2023,19 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
             # embed_query takes a string
             embeddings.embed_query("Single string query")
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 2
 
     # Both should have input data captured as lists
     for span in embeddings_spans:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
-        input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+        input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Input should be normalized to list format
         if isinstance(input_data, str):
             # If serialized, should contain the input text
@@ -1975,7 +2053,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
 )
 def test_langchain_response_model_extraction(
     sentry_init,
-    capture_events,
+    capture_items,
     response_metadata_model,
     expected_model,
 ):
@@ -1983,8 +2061,9 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2009,25 +2088,22 @@ def test_langchain_response_model_extraction(
         response = Mock(generations=[[generation]])
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
 
     if expected_model is not None:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"]
-        assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
+        assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"]
+        assert llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {})
 
 
 # Tests for multimodal content transformation functions
@@ -2286,13 +2362,14 @@ def test_transform_google_file_data(self):
     ],
 )
 def test_langchain_ai_system_detection(
-    sentry_init, capture_events, ai_type, expected_system
+    sentry_init, capture_items, ai_type, expected_system
 ):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2312,23 +2389,20 @@ def test_langchain_ai_system_detection(
         response = Mock(generations=[[generation]])
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
 
     if expected_system is not None:
-        assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+        assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system
     else:
-        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {})
 
 
 class TestTransformLangchainMessageContent:
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 2a385d8a78..b70889548f 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -147,15 +147,16 @@ def test_langgraph_integration_init():
     ],
 )
 def test_state_graph_compile(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test StateGraph.compile() wrapper creates proper create_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     graph = MockStateGraph()
 
     def original_compile(self, *args, **kwargs):
@@ -171,21 +172,23 @@ def original_compile(self, *args, **kwargs):
     assert compiled_graph is not None
     assert compiled_graph.name == "test_graph"
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
-    agent_spans = [span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT]
+    spans = [item.payload for item in items if item.type == "span"]
+    agent_spans = [
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT
+    ]
     assert len(agent_spans) == 1
 
     agent_span = agent_spans[0]
-    assert agent_span["description"] == "create_agent test_graph"
-    assert agent_span["origin"] == "auto.ai.langgraph"
-    assert agent_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
-    assert agent_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-    assert agent_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
-    assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["data"]
-
-    tools_data = agent_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+    assert agent_span["name"] == "create_agent test_graph"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
+    assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"]
+
+    tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
     assert tools_data == ["search_tool", "calculator"]
     assert len(tools_data) == 2
     assert "search_tool" in tools_data
@@ -201,14 +204,15 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_prompts):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -245,26 +249,26 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent test_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    assert invoke_span["name"] == "invoke_agent test_graph"
+    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        request_messages = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
         if isinstance(request_messages, str):
             import json
@@ -273,11 +277,11 @@ def original_invoke(self, *args, **kwargs):
         assert len(request_messages) == 1
         assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert response_text == expected_assistant_response
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
         if isinstance(tool_calls_data, str):
             import json
 
@@ -287,9 +291,11 @@ def original_invoke(self, *args, **kwargs):
         assert tool_calls_data[0]["id"] == "call_test_123"
         assert tool_calls_data[0]["function"]["name"] == "search_tool"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -301,14 +307,15 @@ def original_invoke(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_ainvoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_prompts):
     """Test Pregel.ainvoke() async wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
     pregel = MockPregelInstance("async_graph")
 
@@ -341,30 +348,30 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent async_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
+    assert invoke_span["name"] == "invoke_agent async_graph"
+    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert response_text == expected_assistant_response
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
         if isinstance(tool_calls_data, str):
             import json
 
@@ -374,19 +381,22 @@ async def run_test():
         assert tool_calls_data[0]["id"] == "call_weather_456"
         assert tool_calls_data[0]["function"]["name"] == "get_weather"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+            "attributes", {}
+        )
 
 
-def test_pregel_invoke_error(sentry_init, capture_events):
+def test_pregel_invoke_error(sentry_init, capture_items):
     """Test error handling during graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail")]}
     pregel = MockPregelInstance("error_graph")
 
@@ -397,25 +407,27 @@ def original_invoke(self, *args, **kwargs):
         wrapped_invoke = _wrap_pregel_invoke(original_invoke)
         wrapped_invoke(pregel, test_state)
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "internal_error"
-    assert invoke_span.get("tags", {}).get("status") == "internal_error"
+    assert invoke_span.get("status") == "error"
 
 
-def test_pregel_ainvoke_error(sentry_init, capture_events):
+def test_pregel_ainvoke_error(sentry_init, capture_items):
     """Test error handling during async graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail async")]}
     pregel = MockPregelInstance("async_error_graph")
 
@@ -431,24 +443,26 @@ async def run_error_test():
 
     asyncio.run(run_error_test())
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "internal_error"
-    assert invoke_span.get("tags", {}).get("status") == "internal_error"
+    assert invoke_span.get("status") == "error"
 
 
-def test_span_origin(sentry_init, capture_events):
+def test_span_origin(sentry_init, capture_items):
     """Test that span origins are correctly set."""
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     graph = MockStateGraph()
 
@@ -461,24 +475,26 @@ def original_compile(self, *args, **kwargs):
         wrapped_compile = _wrap_state_graph_compile(original_compile)
         wrapped_compile(graph)
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    for span in tx["spans"]:
-        assert span["origin"] == "auto.ai.langgraph"
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
 
 
 @pytest.mark.parametrize("graph_name", ["my_graph", None, ""])
 def test_pregel_invoke_with_different_graph_names(
-    sentry_init, capture_events, graph_name
+    sentry_init, capture_items, graph_name
 ):
     """Test Pregel.invoke() with different graph name scenarios."""
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance()
     if not graph_name:
@@ -492,25 +508,27 @@ def original_invoke(self, *args, **kwargs):
         wrapped_invoke = _wrap_pregel_invoke(original_invoke)
         wrapped_invoke(pregel, {"messages": []})
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
 
     if graph_name and graph_name.strip():
-        assert invoke_span["description"] == "invoke_agent my_graph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
-        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
+        assert invoke_span["name"] == "invoke_agent my_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
     else:
-        assert invoke_span["description"] == "invoke_agent"
-        assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("data", {})
+        assert invoke_span["name"] == "invoke_agent"
+        assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {})
 
 
-def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events):
+def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -518,8 +536,9 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -564,29 +583,29 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has usage data
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
     # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
-def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events):
+def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -594,8 +613,9 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -643,29 +663,29 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has usage data
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
     # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
-def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events):
+def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -673,8 +693,9 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_e
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -730,23 +751,23 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
 
 
-def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events):
+def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -754,8 +775,9 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -814,23 +836,23 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
 
 
-def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events):
+def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -838,8 +860,9 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -884,23 +907,25 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has response model
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events):
+def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -908,8 +933,9 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -957,23 +983,25 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has response model
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events):
+def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -981,8 +1009,9 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1040,22 +1069,24 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_events):
+def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -1063,8 +1094,9 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_event
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1125,19 +1157,21 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
 def test_complex_message_parsing():
@@ -1187,14 +1221,15 @@ def test_complex_message_parsing():
     assert result[2]["function_call"]["name"] == "search"
 
 
-def test_extraction_functions_complex_scenario(sentry_init, capture_events):
+def test_extraction_functions_complex_scenario(sentry_init, capture_items):
     """Test extraction functions with complex scenarios including multiple messages and edge cases."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance("complex_graph")
     test_state = {"messages": [MockMessage("Complex request", name="user")]}
@@ -1235,21 +1270,23 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
-    response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+    response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     assert response_text == "Final response"
 
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
     import json
 
-    tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
     if isinstance(tool_calls_data, str):
         tool_calls_data = json.loads(tool_calls_data)
 
@@ -1260,14 +1297,15 @@ def original_invoke(self, *args, **kwargs):
     assert tool_calls_data[1]["function"]["name"] == "calculate"
 
 
-def test_langgraph_message_role_mapping(sentry_init, capture_events):
+def test_langgraph_message_role_mapping(sentry_init, capture_items):
     """Test that Langgraph integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock a langgraph message with mixed roles
     class MockMessage:
@@ -1297,17 +1335,18 @@ def __init__(self, content, message_type="human"):
         )
         wrapped_invoke(pregel, state_data)
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
     # Verify that the span was created correctly
-    assert span["op"] == "gen_ai.invoke_agent"
+    assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
 
     # If messages were captured, verify role mapping
-    if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]:
+    if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]:
         import json
 
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
 
         # Find messages with specific content to verify role mapping
         ai_message = next(
@@ -1331,7 +1370,7 @@ def __init__(self, content, message_type="human"):
         assert "ai" not in roles
 
 
-def test_langgraph_message_truncation(sentry_init, capture_events):
+def test_langgraph_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -1339,8 +1378,9 @@ def test_langgraph_message_truncation(sentry_init, capture_events):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1365,23 +1405,25 @@ def original_invoke(self, *args, **kwargs):
         result = wrapped_invoke(pregel, test_state)
 
     assert result is not None
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) > 0
 
     invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
 
-    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
+
+    (tx,) = (item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index a8df5891ce..b9365e7008 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -142,7 +142,7 @@ def __init__(
 def test_nonstreaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -152,8 +152,9 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -179,37 +180,36 @@ def test_nonstreaming_chat_completion(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "litellm test"
 
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -224,7 +224,7 @@ def test_nonstreaming_chat_completion(
 )
 async def test_async_nonstreaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -234,8 +234,9 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -262,37 +263,36 @@ async def test_async_nonstreaming_chat_completion(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "litellm test"
 
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
 @pytest.mark.parametrize(
@@ -307,7 +307,7 @@ async def test_async_nonstreaming_chat_completion(
 def test_streaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -318,8 +318,9 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -350,20 +351,18 @@ def test_streaming_chat_completion(
 
             streaming_handler.executor.shutdown(wait=True)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -378,7 +377,7 @@ def test_streaming_chat_completion(
 )
 async def test_async_streaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -390,8 +389,9 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -425,25 +425,23 @@ async def test_async_streaming_chat_completion(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 def test_embeddings_create(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -458,8 +456,9 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -485,32 +484,34 @@ def test_embeddings_create(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["description"] == "embeddings text-embedding-ada-002"
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["name"] == "embeddings text-embedding-ada-002"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-ada-002"
+        )
         # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -525,8 +526,9 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -553,31 +555,33 @@ async def test_async_embeddings_create(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["description"] == "embeddings text-embedding-ada-002"
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["name"] == "embeddings text-embedding-ada-002"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-ada-002"
+        )
         # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
 def test_embeddings_create_with_list_input(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -587,8 +591,9 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -614,22 +619,21 @@ def test_embeddings_create_with_list_input(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
         # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == [
             "First text",
             "Second text",
@@ -640,7 +644,7 @@ def test_embeddings_create_with_list_input(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create_with_list_input(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -650,8 +654,9 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -678,22 +683,21 @@ async def test_async_embeddings_create_with_list_input(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
         # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == [
             "First text",
             "Second text",
@@ -703,7 +707,7 @@ async def test_async_embeddings_create_with_list_input(
 
 def test_embeddings_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -713,8 +717,9 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -740,27 +745,26 @@ def test_embeddings_no_pii(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
         # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -770,8 +774,9 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -798,31 +803,31 @@ async def test_async_embeddings_no_pii(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
         # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
 
 def test_exception_handling(
-    reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
+    reset_litellm_executor, sentry_init, capture_items, get_rate_limit_model_response
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -843,22 +848,25 @@ def test_exception_handling(
                     client=client,
                 )
 
-    # Should have error event and transaction
-    assert len(events) >= 1
     # Find the error event
-    error_events = [e for e in events if e.get("level") == "error"]
+    error_events = [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     assert len(error_events) == 1
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_exception_handling(
-    sentry_init, capture_events, get_rate_limit_model_response
+    sentry_init, capture_items, get_rate_limit_model_response
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -879,25 +887,28 @@ async def test_async_exception_handling(
                     client=client,
                 )
 
-    # Should have error event and transaction
-    assert len(events) >= 1
     # Find the error event
-    error_events = [e for e in events if e.get("level") == "error"]
+    error_events = [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     assert len(error_events) == 1
 
 
 def test_span_origin(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -923,16 +934,17 @@ def test_span_origin(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.litellm"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm"
 
 
 def test_multiple_providers(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
@@ -942,8 +954,9 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1015,18 +1028,19 @@ def test_multiple_providers(
 
             litellm_utils.executor.shutdown(wait=True)
 
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) == 3
 
-    for i in range(3):
-        span = events[i]["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
         # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_multiple_providers(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
@@ -1036,8 +1050,9 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1112,18 +1127,19 @@ async def test_async_multiple_providers(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) == 3
 
-    for i in range(3):
-        span = events[i]["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
         # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
 
 
 def test_additional_parameters(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1131,8 +1147,9 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1162,26 +1179,27 @@ def test_additional_parameters(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_additional_parameters(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1189,8 +1207,9 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1221,34 +1240,36 @@ async def test_async_additional_parameters(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
 
 
 def test_no_integration(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1273,13 +1294,12 @@ def test_no_integration(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
-    # Should still have the transaction, but no child spans since integration is off
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 0
 
@@ -1287,15 +1307,16 @@ def test_no_integration(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_no_integration(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1321,24 +1342,24 @@ async def test_async_no_integration(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
-    # Should still have the transaction, but no child spans since integration is off
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 0
 
 
-def test_response_without_usage(sentry_init, capture_events):
+def test_response_without_usage(sentry_init, capture_items):
     """Test handling of responses without usage information."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1366,12 +1387,11 @@ def test_response_without_usage(sentry_init, capture_events):
             datetime.now(),
         )
 
-    (event,) = events
-    (span,) = event["spans"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     # Span should still be created even without usage info
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
 
 
 def test_integration_setup(sentry_init):
@@ -1379,6 +1399,7 @@ def test_integration_setup(sentry_init):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Check that callbacks are registered
@@ -1387,14 +1408,15 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-def test_litellm_message_truncation(sentry_init, capture_events):
+def test_litellm_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1422,25 +1444,24 @@ def test_litellm_message_truncation(sentry_init, capture_events):
             datetime.now(),
         )
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
+
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -1452,7 +1473,7 @@ def test_litellm_message_truncation(sentry_init, capture_events):
 def test_binary_content_encoding_image_url(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1460,8 +1481,9 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1498,15 +1520,16 @@ def test_binary_content_encoding_image_url(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1530,7 +1553,7 @@ def test_binary_content_encoding_image_url(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_image_url(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1538,8 +1561,9 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1577,15 +1601,16 @@ async def test_async_binary_content_encoding_image_url(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1609,7 +1634,7 @@ async def test_async_binary_content_encoding_image_url(
 def test_binary_content_encoding_mixed_content(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1617,8 +1642,9 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1656,15 +1682,16 @@ def test_binary_content_encoding_mixed_content(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1676,7 +1703,7 @@ def test_binary_content_encoding_mixed_content(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_mixed_content(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1684,8 +1711,9 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1724,15 +1752,16 @@ async def test_async_binary_content_encoding_mixed_content(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1744,7 +1773,7 @@ async def test_async_binary_content_encoding_mixed_content(
 def test_binary_content_encoding_uri_type(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1752,8 +1781,9 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1789,15 +1819,16 @@ def test_binary_content_encoding_uri_type(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (
@@ -1816,7 +1847,7 @@ def test_binary_content_encoding_uri_type(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_uri_type(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1824,8 +1855,9 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1862,15 +1894,16 @@ async def test_async_binary_content_encoding_uri_type(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index ada2e633de..c4d77db5c8 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -132,14 +132,15 @@ async def __call__(self, *args, **kwargs):
     ],
 )
 def test_nonstreaming_chat_completion_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -163,27 +164,26 @@ def test_nonstreaming_chat_completion_no_prompts(
         )
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -229,13 +229,14 @@ def test_nonstreaming_chat_completion_no_prompts(
         ),
     ],
 )
-def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request):
+def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -256,30 +257,29 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
         )
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -290,12 +290,12 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -308,14 +308,15 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
     ],
 )
 async def test_nonstreaming_chat_completion_async_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -336,27 +337,26 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         response = response.choices[0].message.content
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -404,14 +404,15 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
     ],
 )
 async def test_nonstreaming_chat_completion_async(
-    sentry_init, capture_events, messages, request
+    sentry_init, capture_items, messages, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -429,30 +430,29 @@ async def test_nonstreaming_chat_completion_async(
         response = response.choices[0].message.content
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -463,12 +463,12 @@ async def test_nonstreaming_chat_completion_async(
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 def tiktoken_encoding_if_installed():
@@ -491,7 +491,7 @@ def tiktoken_encoding_if_installed():
 )
 def test_streaming_chat_completion_no_prompts(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -506,8 +506,9 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -581,32 +582,31 @@ def test_streaming_chat_completion_no_prompts(
             )
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
-
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"]["gen_ai.usage.output_tokens"] == 2
-        assert span["data"]["gen_ai.usage.input_tokens"] == 7
-        assert span["data"]["gen_ai.usage.total_tokens"] == 9
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -617,7 +617,7 @@ def test_streaming_chat_completion_no_prompts(
 )
 def test_streaming_chat_completion_with_usage_in_stream(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -626,8 +626,9 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -684,13 +685,11 @@ def test_streaming_chat_completion_with_usage_in_stream(
             for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.skipif(
@@ -699,7 +698,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
 )
 def test_streaming_chat_completion_empty_content_preserves_token_usage(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -708,8 +707,9 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -747,13 +747,11 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
             for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["data"]
-    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["attributes"]
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
 
 
 @pytest.mark.skipif(
@@ -763,7 +761,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_empty_content_preserves_token_usage_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -773,8 +771,9 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -814,13 +813,11 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
             async for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["data"]
-    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["attributes"]
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
 
 
 @pytest.mark.skipif(
@@ -830,7 +827,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_async_with_usage_in_stream(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -840,8 +837,9 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -900,13 +898,11 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
             async for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 # noinspection PyTypeChecker
@@ -955,7 +951,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
 )
 def test_streaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     messages,
     request,
     get_model_response,
@@ -970,8 +966,9 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1041,30 +1038,29 @@ def test_streaming_chat_completion(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -1075,22 +1071,22 @@ def test_streaming_chat_completion(
             },
         ]
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
         if "blocks" in param_id:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 7
-            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
         else:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 12
-            assert span["data"]["gen_ai.usage.total_tokens"] == 14
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -1107,7 +1103,7 @@ def test_streaming_chat_completion(
 )
 async def test_streaming_chat_completion_async_no_prompts(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1123,8 +1119,9 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1201,32 +1198,31 @@ async def test_streaming_chat_completion_async_no_prompts(
                 response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
-
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"]["gen_ai.usage.output_tokens"] == 2
-        assert span["data"]["gen_ai.usage.input_tokens"] == 7
-        assert span["data"]["gen_ai.usage.total_tokens"] == 9
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
 
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -1279,7 +1275,7 @@ async def test_streaming_chat_completion_async_no_prompts(
 )
 async def test_streaming_chat_completion_async(
     sentry_init,
-    capture_events,
+    capture_items,
     messages,
     request,
     get_model_response,
@@ -1295,8 +1291,9 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1371,32 +1368,31 @@ async def test_streaming_chat_completion_async(
                 response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -1407,28 +1403,32 @@ async def test_streaming_chat_completion_async(
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
         if "blocks" in param_id:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 7
-            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
         else:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 12
-            assert span["data"]["gen_ai.usage.total_tokens"] == 14
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
 
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
 
-def test_bad_chat_completion(sentry_init, capture_events):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+def test_bad_chat_completion(sentry_init, capture_items):
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -1440,13 +1440,17 @@ def test_bad_chat_completion(sentry_init, capture_events):
             messages=[{"role": "system", "content": "hello"}],
         )
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_events):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+def test_span_status_error(sentry_init, capture_items):
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
         client = OpenAI(api_key="z")
@@ -1458,17 +1462,24 @@ def test_span_status_error(sentry_init, capture_events):
                 model="some-model", messages=[{"role": "system", "content": "hello"}]
             )
 
-    (error, transaction) = events
-    assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+    (event,) = (item.payload for item in items if item.type == "event")
+    assert event["level"] == "error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
-async def test_bad_chat_completion_async(sentry_init, capture_events):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+async def test_bad_chat_completion_async(sentry_init, capture_items):
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(
@@ -1479,7 +1490,7 @@ async def test_bad_chat_completion_async(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
@@ -1492,14 +1503,15 @@ async def test_bad_chat_completion_async(sentry_init, capture_events):
     ],
 )
 def test_embeddings_create_no_pii(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1521,17 +1533,15 @@ def test_embeddings_create_no_pii(
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -1577,13 +1587,14 @@ def test_embeddings_create_no_pii(
         ),
     ],
 )
-def test_embeddings_create(sentry_init, capture_events, input, request):
+def test_embeddings_create(sentry_init, capture_items, input, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1603,24 +1614,24 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
     param_id = request.node.callspec.id
     if param_id == "string":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"]
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+            "hello"
+        ]
     elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             "First text",
             "Second text",
             "Third text",
         ]
     elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             5,
             8,
             13,
@@ -1628,13 +1639,13 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
             34,
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             [5, 8, 13, 21, 34],
             [8, 13, 21, 34, 55],
         ]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -1647,14 +1658,15 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
     ],
 )
 async def test_embeddings_create_async_no_pii(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1676,17 +1688,15 @@ async def test_embeddings_create_async_no_pii(
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -1733,13 +1743,14 @@ async def test_embeddings_create_async_no_pii(
         ),
     ],
 )
-async def test_embeddings_create_async(sentry_init, capture_events, input, request):
+async def test_embeddings_create_async(sentry_init, capture_items, input, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1761,24 +1772,24 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
     param_id = request.node.callspec.id
     if param_id == "string":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"]
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+            "hello"
+        ]
     elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             "First text",
             "Second text",
             "Third text",
         ]
     elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             5,
             8,
             13,
@@ -1786,13 +1797,13 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
             34,
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             [5, 8, 13, 21, 34],
             [8, 13, 21, 34, 55],
         ]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -1800,14 +1811,15 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
     [(True, True), (True, False), (False, True), (False, False)],
 )
 def test_embeddings_create_raises_error(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event")
 
     client = OpenAI(api_key="z")
 
@@ -1818,7 +1830,7 @@ def test_embeddings_create_raises_error(
     with pytest.raises(OpenAIError):
         client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
@@ -1828,14 +1840,15 @@ def test_embeddings_create_raises_error(
     [(True, True), (True, False), (False, True), (False, False)],
 )
 async def test_embeddings_create_raises_error_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1846,16 +1859,17 @@ async def test_embeddings_create_raises_error_async(
     with pytest.raises(OpenAIError):
         await client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
-def test_span_origin_nonstreaming_chat(sentry_init, capture_events):
+def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -1865,19 +1879,21 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
+async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -1887,18 +1903,20 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
 
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
-def test_span_origin_streaming_chat(sentry_init, capture_events):
+
+def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     returned_stream = Stream(cast_to=None, response=None, client=client)
@@ -1946,21 +1964,23 @@ def test_span_origin_streaming_chat(sentry_init, capture_events):
 
         "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = events
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
+    assert transaction["contexts"]["trace"]["origin"] == "manual"
 
-    assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
 async def test_span_origin_streaming_chat_async(
-    sentry_init, capture_events, async_iterator
+    sentry_init, capture_items, async_iterator
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = AsyncStream(cast_to=None, response=None, client=client)
@@ -2014,18 +2034,20 @@ async def test_span_origin_streaming_chat_async(
 
         # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
-def test_span_origin_embeddings(sentry_init, capture_events):
+def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
 
@@ -2043,19 +2065,21 @@ def test_span_origin_embeddings(sentry_init, capture_events):
     with start_transaction(name="openai tx"):
         client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_embeddings_async(sentry_init, capture_events):
+async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -2073,10 +2097,11 @@ async def test_span_origin_embeddings_async(sentry_init, capture_events):
     with start_transaction(name="openai tx"):
         await client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 def test_completions_token_usage_from_response():
@@ -2442,12 +2467,13 @@ def count_tokens(msg):
 
 
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
+def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
@@ -2462,13 +2488,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
-    assert spans[0]["data"] == {
+    assert spans[0]["attributes"] == {
         "gen_ai.operation.name": "responses",
         "gen_ai.request.max_tokens": 100,
         "gen_ai.request.temperature": 0.7,
@@ -2482,13 +2505,21 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
         "gen_ai.usage.output_tokens": 10,
         "gen_ai.usage.output_tokens.reasoning": 8,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert "gen_ai.system_instructions" not in spans[0]["data"]
-    assert "gen_ai.request.messages" not in spans[0]["data"]
-    assert "gen_ai.response.text" not in spans[0]["data"]
+    assert "gen_ai.system_instructions" not in spans[0]["attributes"]
+    assert "gen_ai.request.messages" not in spans[0]["attributes"]
+    assert "gen_ai.response.text" not in spans[0]["attributes"]
 
 
 @pytest.mark.parametrize(
@@ -2557,14 +2588,15 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
 )
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_ai_client_span_responses_api(
-    sentry_init, capture_events, instructions, input, request
+    sentry_init, capture_items, instructions, input, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
@@ -2579,12 +2611,9 @@ def test_ai_client_span_responses_api(
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -2601,6 +2630,14 @@ def test_ai_client_span_responses_api(
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.request.model": "gpt-4o",
         "gen_ai.response.text": "the model response",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -2759,17 +2796,18 @@ def test_ai_client_span_responses_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_error_in_responses_api(sentry_init, capture_events):
+def test_error_in_responses_api(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(
@@ -2784,15 +2822,17 @@ def test_error_in_responses_api(sentry_init, capture_events):
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    (error_event, transaction_event) = events
-
-    assert transaction_event["type"] == "transaction"
     # make sure the span where the error occurred is captured
-    assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
+    (transaction_event,) = (
+        item.payload for item in items if item.type == "transaction"
+    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
@@ -2866,14 +2906,15 @@ def test_error_in_responses_api(sentry_init, capture_events):
     ],
 )
 async def test_ai_client_span_responses_async_api(
-    sentry_init, capture_events, instructions, input, request
+    sentry_init, capture_items, instructions, input, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
@@ -2888,12 +2929,9 @@ async def test_ai_client_span_responses_async_api(
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -2911,6 +2949,14 @@ async def test_ai_client_span_responses_async_api(
         "gen_ai.usage.output_tokens.reasoning": 8,
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.response.text": "the model response",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -3069,7 +3115,7 @@ async def test_ai_client_span_responses_async_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.asyncio
@@ -3140,7 +3186,7 @@ async def test_ai_client_span_responses_async_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_ai_client_span_streaming_responses_async_api(
     sentry_init,
-    capture_events,
+    capture_items,
     instructions,
     input,
     request,
@@ -3152,8 +3198,9 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3178,11 +3225,12 @@ async def test_ai_client_span_streaming_responses_async_api(
             async for _ in result:
                 pass
 
-    (transaction,) = events
-    spans = [span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES]
+    spans = [item.payload for item in items if item.type == "span"]
+    spans = [
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES
+    ]
 
     assert len(spans) == 1
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -3200,6 +3248,14 @@ async def test_ai_client_span_streaming_responses_async_api(
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.request.model": "gpt-4o",
         "gen_ai.response.text": "hello world",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -3358,18 +3414,19 @@ async def test_ai_client_span_streaming_responses_async_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-async def test_error_in_responses_async_api(sentry_init, capture_events):
+async def test_error_in_responses_async_api(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(
@@ -3384,15 +3441,17 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    (error_event, transaction_event) = events
-
-    assert transaction_event["type"] == "transaction"
     # make sure the span where the error occurred is captured
-    assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
+    (transaction_event,) = (
+        item.payload for item in items if item.type == "transaction"
+    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
@@ -3479,7 +3538,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3493,8 +3552,9 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3525,26 +3585,25 @@ def test_streaming_responses_api(
 
     assert response_string == "hello world"
 
-    (transaction,) = events
-    (span,) = transaction["spans"]
-    assert span["op"] == "gen_ai.responses"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    (span,) = (item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
     if send_default_pii and include_prompts:
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -3555,7 +3614,7 @@ def test_streaming_responses_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3570,8 +3629,9 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3600,26 +3660,25 @@ async def test_streaming_responses_api_async(
 
     assert response_string == "hello world"
 
-    (transaction,) = events
-    (span,) = transaction["spans"]
-    assert span["op"] == "gen_ai.responses"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    (span,) = (item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
     if send_default_pii and include_prompts:
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.skipif(
@@ -3630,12 +3689,13 @@ async def test_streaming_responses_api_async(
     "tools",
     [[], None, NOT_GIVEN, omit],
 )
-def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
+def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3647,10 +3707,9 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
             tools=tools,
         )
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
-    assert "gen_ai.request.available_tools" not in span["data"]
+    assert "gen_ai.request.available_tools" not in span["attributes"]
 
 
 # Test messages with mixed roles including "ai" that should be mapped to "assistant"
@@ -3669,7 +3728,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
     ],
 )
 def test_openai_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
 
@@ -3677,8 +3736,9 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3688,28 +3748,28 @@ def test_openai_message_role_mapping(
     with start_transaction(name="openai tx"):
         client.chat.completions.create(model="test-model", messages=test_messages)
     # Verify that the span was created correctly
-    (event,) = events
-    span = event["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
     # Parse the stored messages
     import json
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == expected_role
 
 
-def test_openai_message_truncation(sentry_init, capture_events):
+def test_openai_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3730,17 +3790,17 @@ def test_openai_message_truncation(sentry_init, capture_events):
             messages=large_messages,
         )
 
-    (event,) = events
-    span = event["spans"][0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    span = next(item.payload for item in items if item.type == "span")
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
-    messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) <= len(large_messages)
 
+    (event,) = (item.payload for item in items if item.type == "transaction")
     meta_path = event["_meta"]
     span_meta = meta_path["spans"]["0"]["data"]
     messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
@@ -3749,7 +3809,7 @@ def test_openai_message_truncation(sentry_init, capture_events):
 
 # noinspection PyTypeChecker
 def test_streaming_chat_completion_ttft(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """
     Test that streaming chat completions capture time-to-first-token (TTFT).
@@ -3757,8 +3817,9 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3810,13 +3871,12 @@ def test_streaming_chat_completion_ttft(
             for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3825,7 +3885,7 @@ def test_streaming_chat_completion_ttft(
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_ttft_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -3836,8 +3896,9 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3891,13 +3952,12 @@ async def test_streaming_chat_completion_ttft_async(
             async for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3905,7 +3965,7 @@ async def test_streaming_chat_completion_ttft_async(
 # noinspection PyTypeChecker
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api_ttft(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """
     Test that streaming responses API captures time-to-first-token (TTFT).
@@ -3913,8 +3973,9 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3936,13 +3997,12 @@ def test_streaming_responses_api_ttft(
             for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.responses"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3952,7 +4012,7 @@ def test_streaming_responses_api_ttft(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_ttft_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -3963,8 +4023,9 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3986,12 +4047,11 @@ async def test_streaming_responses_api_ttft_async(
             async for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.responses"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 7310e86df5..9e74848a04 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -160,7 +160,7 @@ def test_agent_custom_model():
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -182,9 +182,10 @@ async def test_agent_invocation_span_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -193,38 +194,44 @@ async def test_agent_invocation_span_no_pii(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
-    assert "gen_ai.request.messages" not in invoke_agent_span["data"]
-    assert "gen_ai.response.text" not in invoke_agent_span["data"]
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
+    assert "gen_ai.request.messages" not in invoke_agent_span["attributes"]
+    assert "gen_ai.response.text" not in invoke_agent_span["attributes"]
 
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
 
 @pytest.mark.asyncio
@@ -305,7 +312,7 @@ async def test_agent_invocation_span_no_pii(
 )
 async def test_agent_invocation_span(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_with_instructions,
     nonstreaming_responses_model_response,
     instructions,
@@ -333,9 +340,10 @@ async def test_agent_invocation_span(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent,
@@ -346,28 +354,34 @@ async def test_agent_invocation_span(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
 
     # Only first case checks "gen_ai.request.messages" until further input handling work.
     param_id = request.node.callspec.id
     if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["data"]
+        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
 
-        assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize(
+        assert invoke_agent_span["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
             [
                 {"content": [{"text": "Test input", "type": "text"}], "role": "user"},
             ]
         )
 
     elif "string" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -376,13 +390,17 @@ async def test_agent_invocation_span(
             ]
         )
     elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -392,13 +410,17 @@ async def test_agent_invocation_span(
             ]
         )
     elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -408,14 +430,18 @@ async def test_agent_invocation_span(
             ]
         )
     elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     elif "parts_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -426,14 +452,18 @@ async def test_agent_invocation_span(
             ]
         )
     elif instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     else:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -445,32 +475,32 @@ async def test_agent_invocation_span(
         )
 
     assert (
-        invoke_agent_span["data"]["gen_ai.response.text"]
+        invoke_agent_span["attributes"]["gen_ai.response.text"]
         == "Hello, how can I help you?"
     )
 
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
 
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_custom_model,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -495,9 +525,10 @@ async def test_client_span_custom_model(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -506,17 +537,18 @@ async def test_client_span_custom_model(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert ai_client_span["description"] == "chat my-custom-model"
-    assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
+    assert ai_client_span["name"] == "chat my-custom-model"
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model"
 
 
 def test_agent_invocation_span_sync_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -541,44 +573,51 @@ def test_agent_invocation_span_sync_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
 
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
 
 
 @pytest.mark.parametrize(
@@ -658,7 +697,7 @@ def test_agent_invocation_span_sync_no_pii(
 )
 def test_agent_invocation_span_sync(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_with_instructions,
     nonstreaming_responses_model_response,
     instructions,
@@ -686,9 +725,10 @@ def test_agent_invocation_span_sync(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = agents.Runner.run_sync(
             agent,
@@ -699,36 +739,40 @@ def test_agent_invocation_span_sync(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
-
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
     param_id = request.node.callspec.id
     if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["data"]
+        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
     elif "string" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -737,13 +781,17 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -753,13 +801,17 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -769,14 +821,18 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     elif "parts_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -787,14 +843,18 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     else:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -807,7 +867,7 @@ def test_agent_invocation_span_sync(
 
 
 @pytest.mark.asyncio
-async def test_handoff_span(sentry_init, capture_events, get_model_response):
+async def test_handoff_span(sentry_init, capture_items, get_model_response):
     """
     Test that handoff spans are created when agents hand off to other agents.
     """
@@ -908,9 +968,10 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response):
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         result = await agents.Runner.run(
             primary_agent,
@@ -920,21 +981,22 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response):
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
+    spans = [item.payload for item in items if item.type == "span"]
+    handoff_span = next(
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+    )
 
     # Verify handoff span was created
     assert handoff_span is not None
-    assert (
-        handoff_span["description"] == "handoff from primary_agent to secondary_agent"
-    )
-    assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.asyncio
 async def test_max_turns_before_handoff_span(
-    sentry_init, capture_events, get_model_response
+    sentry_init, capture_items, get_model_response
 ):
     """
     Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
@@ -1036,9 +1098,10 @@ async def test_max_turns_before_handoff_span(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         with pytest.raises(MaxTurnsExceeded):
             await agents.Runner.run(
@@ -1048,22 +1111,23 @@ async def test_max_turns_before_handoff_span(
                 max_turns=1,
             )
 
-    (error, transaction) = events
-    spans = transaction["spans"]
-    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
+    spans = [item.payload for item in items if item.type == "span"]
+    handoff_span = next(
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+    )
 
     # Verify handoff span was created
     assert handoff_span is not None
-    assert (
-        handoff_span["description"] == "handoff from primary_agent to secondary_agent"
-    )
-    assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
@@ -1133,9 +1197,10 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         await agents.Runner.run(
             agent_with_tool,
@@ -1143,13 +1208,26 @@ def simple_test_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    agent_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
     ai_client_span1, ai_client_span2 = (
-        span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
+    tool_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
     )
-    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
 
     available_tool = {
         "name": "simple_test_tool",
@@ -1189,39 +1267,36 @@ def simple_test_tool(message: str) -> str:
             }
         )
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
-    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+    assert agent_span["name"] == "invoke_agent test_agent"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+    assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
 
     agent_span_available_tool = json.loads(
-        agent_span["data"]["gen_ai.request.available_tools"]
+        agent_span["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
 
-    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert agent_span["data"]["gen_ai.system"] == "openai"
+    assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert agent_span["attributes"]["gen_ai.system"] == "openai"
 
-    assert ai_client_span1["description"] == "chat gpt-4"
-    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span1["name"] == "chat gpt-4"
+    assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
 
     ai_client_span1_available_tool = json.loads(
-        ai_client_span1["data"]["gen_ai.request.available_tools"]
+        ai_client_span1["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(
         ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+    assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span1["attributes"]["gen_ai.request.messages"] == safe_serialize(
         [
             {
                 "role": "user",
@@ -1231,14 +1306,14 @@ def simple_test_tool(message: str) -> str:
             },
         ]
     )
-    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+    assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
+    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
 
     tool_call = {
         "arguments": '{"message": "hello"}',
@@ -1252,41 +1327,41 @@ def simple_test_tool(message: str) -> str:
     if OPENAI_VERSION >= (2, 25, 0):
         tool_call["namespace"] = None
 
-    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+    assert json.loads(ai_client_span1["attributes"]["gen_ai.response.tool_calls"]) == [
         tool_call
     ]
 
-    assert tool_span["description"] == "execute_tool simple_test_tool"
-    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["name"] == "execute_tool simple_test_tool"
+    assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
 
     tool_span_available_tool = json.loads(
-        tool_span["data"]["gen_ai.request.available_tools"]
+        tool_span["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
 
-    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert tool_span["data"]["gen_ai.system"] == "openai"
-    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-    assert ai_client_span2["description"] == "chat gpt-4"
-    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+    assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert tool_span["attributes"]["gen_ai.system"] == "openai"
+    assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
+    assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
+    assert tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
+    assert ai_client_span2["name"] == "chat gpt-4"
+    assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
     ai_client_span2_available_tool = json.loads(
-        ai_client_span2["data"]["gen_ai.request.available_tools"]
+        ai_client_span2["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(
         ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+    assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span2["attributes"]["gen_ai.request.messages"] == safe_serialize(
         [
             {
                 "role": "tool",
@@ -1300,19 +1375,19 @@ def simple_test_tool(message: str) -> str:
             },
         ]
     )
-    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
     assert (
-        ai_client_span2["data"]["gen_ai.response.text"]
+        ai_client_span2["attributes"]["gen_ai.response.text"]
         == "Task completed using the tool"
     )
-    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+    assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio
@@ -1351,6 +1426,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -1513,6 +1589,7 @@ async def test_hosted_mcp_tool_propagation_headers(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True)
@@ -1570,7 +1647,7 @@ async def test_hosted_mcp_tool_propagation_headers(
 
 
 @pytest.mark.asyncio
-async def test_model_behavior_error(sentry_init, capture_events, test_agent):
+async def test_model_behavior_error(sentry_init, capture_items, test_agent):
     """
     Example raising agents.exceptions.AgentsException before the agent invocation span is complete.
     The mocked API response indicates that "wrong_tool" was called.
@@ -1611,9 +1688,10 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
-            events = capture_events()
+            items = capture_items("span", "transaction")
 
             with pytest.raises(ModelBehaviorError):
                 await agents.Runner.run(
@@ -1622,26 +1700,27 @@ def simple_test_tool(message: str) -> str:
                     run_config=test_run_config,
                 )
 
-    (error, transaction) = events
-    spans = transaction["spans"]
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    spans = [item.payload for item in items if item.type == "span"]
     (
         agent_span,
         ai_client_span1,
     ) = spans
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
+    assert agent_span["name"] == "invoke_agent test_agent"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
 
     # Error due to unrecognized tool in model response.
-    assert agent_span["status"] == "internal_error"
-    assert agent_span["tags"]["status"] == "internal_error"
+    assert agent_span["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_events, test_agent):
+async def test_error_handling(sentry_init, capture_items, test_agent):
     """
     Test error handling in agent execution.
     """
@@ -1658,41 +1737,42 @@ async def test_error_handling(sentry_init, capture_events, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
-            events = capture_events()
+            items = capture_items("event", "span", "transaction")
 
             with pytest.raises(Exception, match="Model Error"):
                 await agents.Runner.run(
                     test_agent, "Test input", run_config=test_run_config
                 )
 
-    (
-        error_event,
-        transaction,
-    ) = events
-
+    error_events = [item.payload for item in items if item.type == "event"]
+    assert len(error_events) == 1
+    error_event = error_events[0]
     assert error_event["exception"]["values"][0]["type"] == "Exception"
     assert error_event["exception"]["values"][0]["value"] == "Model Error"
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents"
 
-    spans = transaction["spans"]
-    (invoke_agent_span, ai_client_span) = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
+    spans = [item.payload for item in items if item.type == "span"]
+    (invoke_agent_span, ai_client_span) = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["origin"] == "auto.ai.openai_agents"
-    assert ai_client_span["status"] == "internal_error"
-    assert ai_client_span["tags"]["status"] == "internal_error"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+    assert ai_client_span["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_error_captures_input_data(sentry_init, capture_events, test_agent):
+async def test_error_captures_input_data(sentry_init, capture_items, test_agent):
     """
     Test that input data is captured even when the API call raises an exception.
     This verifies that _set_input_data is called before the API call.
@@ -1723,39 +1803,39 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("event", "span")
 
         with pytest.raises(InternalServerError, match="Error code: 500"):
             await agents.Runner.run(agent, "Test input", run_config=test_run_config)
 
-    (
-        error_event,
-        transaction,
-    ) = events
-
+    error_events = [item.payload for item in items if item.type == "event"]
+    assert len(error_events) == 1
+    error_event = error_events[0]
     assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
     assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
 
-    spans = transaction["spans"]
-    ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ][0]
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["status"] == "internal_error"
-    assert ai_client_span["tags"]["status"] == "internal_error"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["status"] == "error"
 
-    assert "gen_ai.request.messages" in ai_client_span["data"]
+    assert "gen_ai.request.messages" in ai_client_span["attributes"]
     request_messages = safe_serialize(
         [
             {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
         ]
     )
-    assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages
+    assert ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
 
 
 @pytest.mark.asyncio
-async def test_span_status_error(sentry_init, capture_events, test_agent):
+async def test_span_status_error(sentry_init, capture_items, test_agent):
     with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
         with patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
@@ -1768,25 +1848,29 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
-            events = capture_events()
+            items = capture_items("event", "transaction", "span")
 
             with pytest.raises(ValueError, match="Model Error"):
                 await agents.Runner.run(
                     test_agent, "Test input", run_config=test_run_config
                 )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_spans(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
@@ -1878,9 +1962,10 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -1888,33 +1973,35 @@ async def test_mcp_tool_execution_spans(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool test_mcp_tool":
+        if span.get("name") == "execute_tool test_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
+    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
     assert (
-        mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
+        mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "search term"}'
+    )
+    assert (
+        mcp_tool_span["attributes"]["gen_ai.tool.output"]
+        == "MCP tool executed successfully"
     )
 
     # Verify no error status since error was None
-    assert mcp_tool_span.get("status") != "internal_error"
-    assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
+    assert mcp_tool_span.get("status") != "error"
+    assert mcp_tool_span.get("tags", {}).get("status") != "error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_with_error(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP tool calls with errors are tracked with error status.
@@ -2006,9 +2093,10 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -2016,31 +2104,29 @@ async def test_mcp_tool_execution_with_error(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span with error
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool failing_mcp_tool":
+        if span.get("name") == "execute_tool failing_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created with error status
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
-    assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
+    assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
+    assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None"
 
     # Verify error status was set
-    assert mcp_tool_span["status"] == "internal_error"
-    assert mcp_tool_span["tags"]["status"] == "internal_error"
+    assert mcp_tool_span["status"] == "error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_without_pii(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP tool input/output are not included when send_default_pii is False.
@@ -2132,9 +2218,10 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -2142,30 +2229,29 @@ async def test_mcp_tool_execution_without_pii(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool test_mcp_tool":
+        if span.get("name") == "execute_tool test_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created but without input/output
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
 
     # Verify input and output are not included when send_default_pii is False
-    assert "gen_ai.tool.input" not in mcp_tool_span["data"]
-    assert "gen_ai.tool.output" not in mcp_tool_span["data"]
+    assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
+    assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -2190,9 +2276,10 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         async def run():
             await agents.Runner.run(
@@ -2203,14 +2290,10 @@ async def run():
 
         await asyncio.gather(*[run() for _ in range(3)])
 
-    assert len(events) == 3
-    txn1, txn2, txn3 = events
+    txn1, txn2, txn3 = (item.payload for item in items if item.type == "transaction")
 
-    assert txn1["type"] == "transaction"
     assert txn1["transaction"] == "test_agent workflow"
-    assert txn2["type"] == "transaction"
     assert txn2["transaction"] == "test_agent workflow"
-    assert txn3["type"] == "transaction"
     assert txn3["transaction"] == "test_agent workflow"
 
 
@@ -2230,13 +2313,14 @@ async def run():
     ],
 )
 def test_openai_agents_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     get_response_kwargs = {"input": [test_message]}
@@ -2259,7 +2343,7 @@ def test_openai_agents_message_role_mapping(
 @pytest.mark.asyncio
 async def test_tool_execution_error_tracing(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
@@ -2336,9 +2420,10 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         # Note: The agents library catches tool exceptions internally,
         # so we don't expect this to raise
@@ -2348,13 +2433,12 @@ def failing_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the execute_tool span
     execute_tool_span = None
     for span in spans:
-        description = span.get("description", "")
+        description = span.get("name", "")
         if description is not None and description.startswith(
             "execute_tool failing_tool"
         ):
@@ -2363,19 +2447,18 @@ def failing_tool(message: str) -> str:
 
     # Verify the execute_tool span was created
     assert execute_tool_span is not None, "execute_tool span was not created"
-    assert execute_tool_span["description"] == "execute_tool failing_tool"
-    assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
+    assert execute_tool_span["name"] == "execute_tool failing_tool"
+    assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
 
     # Verify error status was set (this is the key test for our patch)
     # The span should be marked as error because the tool execution failed
-    assert execute_tool_span["status"] == "internal_error"
-    assert execute_tool_span["tags"]["status"] == "internal_error"
+    assert execute_tool_span["status"] == "error"
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_usage_data(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2435,9 +2518,10 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2445,29 +2529,30 @@ async def test_invoke_agent_span_includes_usage_data(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     )
 
     # Verify invoke_agent span has usage data from context_wrapper
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5
 
 
 @pytest.mark.asyncio
 async def test_ai_client_span_includes_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2527,9 +2612,10 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2537,20 +2623,21 @@ async def test_ai_client_span_includes_response_model(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
     # Verify ai_client span has response model from API response
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.asyncio
 async def test_ai_client_span_response_model_with_chat_completions(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
 ):
     """
@@ -2614,9 +2701,10 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2624,18 +2712,22 @@ async def test_ai_client_span_response_model_with_chat_completions(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
     # Verify response model from API response is captured
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4o-mini-2024-07-18"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert (
+        ai_client_span["attributes"]["gen_ai.response.model"]
+        == "gpt-4o-mini-2024-07-18"
+    )
 
 
 @pytest.mark.asyncio
 async def test_multiple_llm_calls_aggregate_usage(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
@@ -2732,9 +2824,10 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent_with_tool,
@@ -2744,25 +2837,24 @@ def calculator(a: int, b: int) -> int:
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
     # Cached tokens should be aggregated: 0 + 5 = 5
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 5
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5
     # Reasoning tokens should be aggregated: 0 + 3 = 3
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2821,9 +2913,10 @@ async def test_invoke_agent_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2831,27 +2924,32 @@ async def test_invoke_agent_span_includes_response_model(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify invoke_agent span has response model from API
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
     # Also verify ai_client span has it
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_uses_last_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2950,9 +3048,10 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent_with_tool,
@@ -2962,24 +3061,26 @@ def calculator(a: int, b: int) -> int:
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = spans[0]
     first_ai_client_span = spans[1]
     second_ai_client_span = spans[3]  # After tool span
 
     # Invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
     # Each ai_client span has its own response model from the API
-    assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613"
+    assert first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613"
     assert (
-        second_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        second_ai_client_span["attributes"]["gen_ai.response.model"]
+        == "gpt-4.1-2025-04-14"
     )
 
 
-def test_openai_agents_message_truncation(sentry_init, capture_events):
+def test_openai_agents_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in OpenAI Agents integration."""
 
     large_content = (
@@ -2990,6 +3091,7 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_messages = [
@@ -3036,6 +3138,7 @@ async def test_streaming_span_update_captures_response_data(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent)
@@ -3101,6 +3204,7 @@ async def test_streaming_ttft_on_chat_span(
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -3230,7 +3334,7 @@ async def test_streaming_ttft_on_chat_span(
 @pytest.mark.asyncio
 async def test_conversation_id_on_all_spans(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -3255,9 +3359,10 @@ async def test_conversation_id_on_all_spans(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent,
@@ -3268,24 +3373,28 @@ async def test_conversation_id_on_all_spans(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify workflow span (transaction) has conversation_id
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
         == "conv_test_123"
     )
 
     # Verify invoke_agent span has conversation_id
-    assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+    assert invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
 
     # Verify ai_client span has conversation_id
-    assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+    assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
 
 
 @pytest.mark.skipif(
@@ -3294,7 +3403,7 @@ async def test_conversation_id_on_all_spans(
 )
 @pytest.mark.asyncio
 async def test_conversation_id_on_tool_span(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run().
@@ -3389,9 +3498,10 @@ def simple_tool(message: str) -> str:
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent_with_tool,
@@ -3400,21 +3510,20 @@ def simple_tool(message: str) -> str:
             conversation_id="conv_tool_test_456",
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find the tool span
     tool_span = None
     for span in spans:
-        if span.get("description", "").startswith("execute_tool"):
+        if span.get("name", "").startswith("execute_tool"):
             tool_span = span
             break
 
     assert tool_span is not None
     # Tool span should have the conversation_id passed to Runner.run()
-    assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456"
+    assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456"
 
     # Workflow span (transaction) should have the same conversation_id
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
         == "conv_tool_test_456"
@@ -3428,7 +3537,7 @@ def simple_tool(message: str) -> str:
 @pytest.mark.asyncio
 async def test_no_conversation_id_when_not_provided(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -3453,9 +3562,10 @@ async def test_no_conversation_id_when_not_provided(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         # Don't pass conversation_id
         result = await agents.Runner.run(
@@ -3464,16 +3574,23 @@ async def test_no_conversation_id_when_not_provided(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify conversation_id is NOT set on any spans
     assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
-        "data", {}
+        "attributes", {}
     )
-    assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {})
-    assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})
+    assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {})
+    assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 50ce155f5b..9faccb0a84 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -53,7 +53,7 @@ def inner():
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for async agent runs.
     """
@@ -61,9 +61,10 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = await test_agent.run("Test input")
@@ -71,8 +72,7 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
 
     # Verify transaction (the transaction IS the invoke_agent span)
     assert transaction["transaction"] == "invoke_agent test_agent"
@@ -81,28 +81,32 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
     # The transaction itself should have invoke_agent data
     assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent"
 
+    spans = [item.payload for item in items if item.type == "span"]
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Check chat span
     chat_span = chat_spans[0]
-    assert "chat" in chat_span["description"]
-    assert chat_span["data"]["gen_ai.operation.name"] == "chat"
-    assert chat_span["data"]["gen_ai.response.streaming"] is False
-    assert "gen_ai.request.messages" in chat_span["data"]
-    assert "gen_ai.usage.input_tokens" in chat_span["data"]
-    assert "gen_ai.usage.output_tokens" in chat_span["data"]
+    assert "chat" in chat_span["name"]
+    assert chat_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+    assert "gen_ai.request.messages" in chat_span["attributes"]
+    assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async_model_error(sentry_init, capture_events):
+async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
@@ -115,17 +119,17 @@ def failing_model(messages, info):
     with pytest.raises(RuntimeError, match="model exploded"):
         await agent.run("Test input")
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     assert len(spans) == 1
 
-    assert spans[0]["status"] == "internal_error"
+    assert spans[0]["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_agent):
     """
     Test that the invoke_agent span includes token usage and model data.
     """
@@ -133,9 +137,10 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = await test_agent.run("Test input")
@@ -143,8 +148,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Verify transaction (the transaction IS the invoke_agent span)
     assert transaction["transaction"] == "invoke_agent test_agent"
 
@@ -170,7 +174,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
     assert trace_data["gen_ai.response.model"] == "test"  # Test model name
 
 
-def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
+def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for sync agent runs.
     """
@@ -178,9 +182,10 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = test_agent.run_sync("Test input")
@@ -188,29 +193,32 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
     # Find span types
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Verify streaming flag is False for sync
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is False
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
 
 
-def test_agent_run_sync_model_error(sentry_init, capture_events):
+def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
@@ -223,17 +231,17 @@ def failing_model(messages, info):
     with pytest.raises(RuntimeError, match="model exploded"):
         agent.run_sync("Test input")
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     assert len(spans) == 1
 
-    assert spans[0]["status"] == "internal_error"
+    assert spans[0]["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for streaming agent runs.
     """
@@ -241,9 +249,10 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     async with test_agent.run_stream("Test input") as result:
@@ -251,31 +260,33 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
         async for _ in result.stream_output():
             pass
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
     # Find chat spans
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Verify streaming flag is True for streaming
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is True
-        assert "gen_ai.request.messages" in chat_span["data"]
-        assert "gen_ai.usage.input_tokens" in chat_span["data"]
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
+        assert "gen_ai.request.messages" in chat_span["attributes"]
+        assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
         # Streaming responses should still have output data
         assert (
-            "gen_ai.response.text" in chat_span["data"]
-            or "gen_ai.response.model" in chat_span["data"]
+            "gen_ai.response.text" in chat_span["attributes"]
+            or "gen_ai.response.model" in chat_span["attributes"]
         )
 
 
 @pytest.mark.asyncio
-async def test_agent_run_stream_events(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agent):
     """
     Test that run_stream_events creates spans (it uses run internally, so non-streaming).
     """
@@ -283,32 +294,34 @@ async def test_agent_run_stream_events(sentry_init, capture_events, get_test_age
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Consume all events
     test_agent = get_test_agent()
     async for _ in test_agent.run_stream_events("Test input"):
         pass
 
-    (transaction,) = events
-
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
 
     # Find chat spans
-    spans = transaction["spans"]
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # run_stream_events uses run() internally, so streaming should be False
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is False
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
 
 
 @pytest.mark.asyncio
-async def test_agent_with_tools(sentry_init, capture_events, get_test_agent):
+async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
     """
     Test that tool execution creates execute_tool spans.
     """
@@ -316,6 +329,7 @@ async def test_agent_with_tools(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -325,34 +339,39 @@ def add_numbers(a: int, b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     result = await test_agent.run("What is 5 + 3?")
 
     assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool span
     tool_span = tool_spans[0]
-    assert "execute_tool" in tool_span["description"]
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert "execute_tool" in tool_span["name"]
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
@@ -363,7 +382,7 @@ def add_numbers(a: int, b: int) -> int:
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_model_retry(
-    sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions
+    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
 ):
     """
     Test that a handled exception is captured when a tool raises ModelRetry.
@@ -376,6 +395,7 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     retries = 0
@@ -391,47 +411,51 @@ def add_numbers(a: int, b: int) -> float:
             raise ModelRetry(message="Try again with the same arguments.")
         return a + b
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     result = await test_agent.run("What is 5 + 3?")
 
     assert result is not None
 
     if handled_tool_call_exceptions:
-        (error, transaction) = events
-    else:
-        (transaction,) = events
-    spans = transaction["spans"]
-
-    if handled_tool_call_exceptions:
+        (error,) = (item.payload for item in items if item.type == "event")
         assert error["level"] == "error"
         assert error["exception"]["values"][0]["mechanism"]["handled"]
 
+    spans = [item.payload for item in items if item.type == "span"]
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool spans
     model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["description"]
-    assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+    assert "execute_tool" in model_retry_tool_span["name"]
+    assert (
+        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    )
+    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
 
     tool_span = tool_spans[1]
-    assert "execute_tool" in tool_span["description"]
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert "execute_tool" in tool_span["name"]
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
@@ -442,7 +466,7 @@ def add_numbers(a: int, b: int) -> float:
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_validation_error(
-    sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions
+    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
 ):
     """
     Test that a handled exception is captured when a tool has unsatisfiable constraints.
@@ -455,6 +479,7 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -464,7 +489,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     result = None
     with pytest.raises(UnexpectedModelBehavior):
@@ -473,42 +498,45 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int:
     assert result is None
 
     if handled_tool_call_exceptions:
-        (error, model_behaviour_error, transaction) = events
-    else:
         (
+            error,
             model_behaviour_error,
-            transaction,
-        ) = events
-    spans = transaction["spans"]
-
-    if handled_tool_call_exceptions:
+        ) = (item.payload for item in items if item.type == "event")
         assert error["level"] == "error"
         assert error["exception"]["values"][0]["mechanism"]["handled"]
 
-    # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool spans
     model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["description"]
-    assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+    assert "execute_tool" in model_retry_tool_span["name"]
+    assert (
+        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    )
+    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
 
 @pytest.mark.asyncio
-async def test_agent_with_tools_streaming(sentry_init, capture_events, get_test_agent):
+async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_agent):
     """
     Test that tool execution works correctly with streaming.
     """
@@ -516,6 +544,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_events, get_test_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -525,62 +554,67 @@ def multiply(a: int, b: int) -> int:
         """Multiply two numbers."""
         return a * b
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async with test_agent.run_stream("What is 7 times 8?") as result:
         async for _ in result.stream_output():
             pass
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find span types
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Verify streaming flag is True
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is True
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
 
     # Check tool span
     tool_span = tool_spans[0]
-    assert tool_span["data"]["gen_ai.tool.name"] == "multiply"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_model_settings(
-    sentry_init, capture_events, get_test_agent_with_settings
-):
+async def test_model_settings(sentry_init, capture_items, get_test_agent_with_settings):
     """
     Test that model settings are captured in spans.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent_with_settings = get_test_agent_with_settings()
     await test_agent_with_settings.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find chat span
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
     # Check that model settings are captured
-    assert chat_span["data"].get("gen_ai.request.temperature") == 0.7
-    assert chat_span["data"].get("gen_ai.request.max_tokens") == 100
-    assert chat_span["data"].get("gen_ai.request.top_p") == 0.9
+    assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7
+    assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100
+    assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9
 
 
 @pytest.mark.asyncio
@@ -594,7 +628,7 @@ async def test_model_settings(
     ],
 )
 async def test_system_prompt_attribute(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """
     Test that system prompts are included as the first message.
@@ -609,23 +643,27 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Hello")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
 
     if send_default_pii and include_prompts:
-        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        system_instructions = chat_span["attributes"][
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+        ]
         assert json.loads(system_instructions) == [
             {
                 "type": "text",
@@ -633,11 +671,11 @@ async def test_system_prompt_attribute(
             }
         ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_events):
+async def test_error_handling(sentry_init, capture_items):
     """
     Test error handling in agent execution.
     """
@@ -651,16 +689,16 @@ async def test_error_handling(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Simple run that should succeed
     await agent.run("Hello")
 
     # At minimum, we should have a transaction
-    assert len(events) >= 1
-    transaction = [e for e in events if e.get("type") == "transaction"][0]
+    transaction = next(item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_error"
     # Transaction should complete successfully (status key may not exist if no error)
     trace_status = transaction["contexts"]["trace"].get("status")
@@ -668,7 +706,7 @@ async def test_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_without_pii(sentry_init, capture_events, get_test_agent):
+async def test_without_pii(sentry_init, capture_items, get_test_agent):
     """
     Test that PII is not captured when send_default_pii is False.
     """
@@ -676,27 +714,29 @@ async def test_without_pii(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Sensitive input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages and response text are not captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_without_pii_tools(sentry_init, capture_events, get_test_agent):
+async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
     """
     Test that tool input/output are not captured when send_default_pii is False.
     """
@@ -704,6 +744,7 @@ async def test_without_pii_tools(sentry_init, capture_events, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -713,33 +754,37 @@ def sensitive_tool(data: str) -> str:
         """A tool with sensitive data."""
         return f"Processed: {data}"
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use sensitive tool with private data")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find tool spans
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # If tool was executed, verify input/output are not captured
     for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_multiple_agents_concurrent(sentry_init, capture_events, get_test_agent):
+async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_agent):
     """
     Test that multiple agents can run concurrently without interfering.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
 
@@ -750,18 +795,15 @@ async def run_agent(input_text):
     results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
 
     assert len(results) == 3
-    assert len(events) == 3
 
     # Verify each transaction is separate
+    events = [item.payload for item in items if item.type == "transaction"]
     for i, transaction in enumerate(events):
-        assert transaction["type"] == "transaction"
         assert transaction["transaction"] == "invoke_agent test_agent"
-        # Each should have its own spans
-        assert len(transaction["spans"]) >= 1
 
 
 @pytest.mark.asyncio
-async def test_message_history(sentry_init, capture_events):
+async def test_message_history(sentry_init, capture_items):
     """
     Test that full conversation history is captured in chat spans.
     """
@@ -774,9 +816,10 @@ async def test_message_history(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # First message
     await agent.run("Hello, I'm Alice")
@@ -797,51 +840,56 @@ async def test_message_history(sentry_init, capture_events):
     await agent.run("What is my name?", message_history=history)
 
     # We should have 2 transactions
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) >= 2
 
     # Check the second transaction has the full history
     second_transaction = events[1]
     spans = second_transaction["spans"]
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     if chat_spans:
         chat_span = chat_spans[0]
-        if "gen_ai.request.messages" in chat_span["data"]:
-            messages_data = chat_span["data"]["gen_ai.request.messages"]
+        if "gen_ai.request.messages" in chat_span["attributes"]:
+            messages_data = chat_span["attributes"]["gen_ai.request.messages"]
             # Should have multiple messages including history
             assert len(messages_data) > 1
 
 
 @pytest.mark.asyncio
-async def test_gen_ai_system(sentry_init, capture_events, get_test_agent):
+async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     """
     Test that gen_ai.system is set from the model.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find chat span
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
     # gen_ai.system should be set from the model (TestModel -> 'test')
-    assert "gen_ai.system" in chat_span["data"]
-    assert chat_span["data"]["gen_ai.system"] == "test"
+    assert "gen_ai.system" in chat_span["attributes"]
+    assert chat_span["attributes"]["gen_ai.system"] == "test"
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_false(sentry_init, capture_events, get_test_agent):
+async def test_include_prompts_false(sentry_init, capture_items, get_test_agent):
     """
     Test that prompts are not captured when include_prompts=False.
     """
@@ -849,27 +897,29 @@ async def test_include_prompts_false(sentry_init, capture_events, get_test_agent
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Sensitive prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages and response text are not captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_true(sentry_init, capture_events, get_test_agent):
+async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
     """
     Test that prompts are captured when include_prompts=True (default).
     """
@@ -877,28 +927,30 @@ async def test_include_prompts_true(sentry_init, capture_events, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages are captured in chat spans
     assert len(chat_spans) >= 1
     for chat_span in chat_spans:
-        assert "gen_ai.request.messages" in chat_span["data"]
+        assert "gen_ai.request.messages" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_include_prompts_false_with_tools(
-    sentry_init, capture_events, get_test_agent
+    sentry_init, capture_items, get_test_agent
 ):
     """
     Test that tool input/output are not captured when include_prompts=False.
@@ -907,6 +959,7 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -916,26 +969,27 @@ def test_tool(value: int) -> int:
         """A test tool."""
         return value * 2
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use the test tool with value 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find tool spans
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # If tool was executed, verify input/output are not captured
     for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_requires_pii(
-    sentry_init, capture_events, get_test_agent
-):
+async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test_agent):
     """
     Test that include_prompts requires send_default_pii=True.
     """
@@ -943,27 +997,29 @@ async def test_include_prompts_requires_pii(
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Even with include_prompts=True, if PII is disabled, messages should not be captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_spans(sentry_init, capture_events):
+async def test_mcp_tool_execution_spans(sentry_init, capture_items):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
 
@@ -1033,14 +1089,13 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Simulate MCP tool execution within a transaction through CombinedToolset
-    with sentry_sdk.start_transaction(
-        op="ai.run", name="invoke_agent test_mcp_agent"
-    ) as transaction:
+    with sentry_sdk.start_transaction(op="ai.run", name="invoke_agent test_mcp_agent"):
         # Set up the agent context
         scope = sentry_sdk.get_current_scope()
         scope._contexts["pydantic_ai_agent"] = {
@@ -1080,13 +1135,10 @@ async def mock_map_tool_result_part(part):
             # MCP tool might raise if not fully mocked, that's okay
             pass
 
-    events_list = events
+    events_list = items
     if len(events_list) == 0:
         pytest.skip("No events captured, MCP test setup incomplete")
 
-    (transaction,) = events_list
-    transaction["spans"]
-
     # Note: This test manually calls combined.call_tool which doesn't go through
     # ToolManager._call_tool (which is what the integration patches).
     # In real-world usage, MCP tools are called through agent.run() which uses ToolManager.
@@ -1107,6 +1159,7 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1130,6 +1183,7 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1154,6 +1208,7 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1180,6 +1235,7 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1214,6 +1270,7 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a second agent
@@ -1256,7 +1313,7 @@ async def run_and_check_context(agent, agent_name):
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
+async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
     """
     Test that invoke_agent span handles list user prompts correctly.
     """
@@ -1269,17 +1326,17 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Use a list as user prompt
     await agent.run(["First part", "Second part"])
 
-    (transaction,) = events
-
     # Check that the invoke_agent transaction has messages data
     # The invoke_agent is the transaction itself
+    (transaction,) = [item.payload for item in items if item.type == "transaction"]
     if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]:
         messages_str = transaction["contexts"]["trace"]["data"][
             "gen_ai.request.messages"
@@ -1299,7 +1356,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
     ],
 )
 async def test_invoke_agent_with_instructions(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """
     Test that invoke_agent span handles instructions correctly.
@@ -1320,33 +1377,37 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
 
     if send_default_pii and include_prompts:
-        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        system_instructions = chat_span["attributes"][
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+        ]
         assert json.loads(system_instructions) == [
             {"type": "text", "content": "System prompt"},
             {"type": "text", "content": "Instruction 1\nInstruction 2"},
         ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_model_name_extraction_with_callable(sentry_init, capture_events):
+async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     """
     Test model name extraction when model has a callable name() method.
     """
@@ -1356,6 +1417,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1372,7 +1434,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events):
+async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items):
     """
     Test model name extraction falls back to str() when no name attribute exists.
     """
@@ -1382,6 +1444,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1399,7 +1462,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events
 
 
 @pytest.mark.asyncio
-async def test_model_settings_object_style(sentry_init, capture_events):
+async def test_model_settings_object_style(sentry_init, capture_items):
     """
     Test that object-style model settings (non-dict) are handled correctly.
     """
@@ -1410,6 +1473,7 @@ async def test_model_settings_object_style(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1433,7 +1497,7 @@ async def test_model_settings_object_style(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_usage_data_partial(sentry_init, capture_events):
+async def test_usage_data_partial(sentry_init, capture_items):
     """
     Test that usage data is correctly handled when only some fields are present.
     """
@@ -1445,16 +1509,18 @@ async def test_usage_data_partial(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Check that usage data fields exist (they may or may not be set depending on TestModel)
@@ -1464,7 +1530,7 @@ async def test_usage_data_partial(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_agent_data_from_scope(sentry_init, capture_events):
+async def test_agent_data_from_scope(sentry_init, capture_items):
     """
     Test that agent data can be retrieved from Sentry scope when not passed directly.
     """
@@ -1477,22 +1543,22 @@ async def test_agent_data_from_scope(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # The integration automatically sets agent in scope during execution
     await agent.run("Test input")
 
-    (transaction,) = events
-
-    # Verify agent name is captured
+    # Verify agent name is capture
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_scope_agent"
 
 
 @pytest.mark.asyncio
 async def test_available_tools_without_description(
-    sentry_init, capture_events, get_test_agent
+    sentry_init, capture_items, get_test_agent
 ):
     """
     Test that available tools are captured even when description is missing.
@@ -1500,6 +1566,7 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1509,23 +1576,24 @@ def tool_without_desc(x: int) -> int:
         # No docstring = no description
         return x * 2
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use the tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     if chat_spans:
         chat_span = chat_spans[0]
-        if "gen_ai.request.available_tools" in chat_span["data"]:
-            tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        if "gen_ai.request.available_tools" in chat_span["attributes"]:
+            tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
             assert "tool_without_desc" in tools_str
 
 
 @pytest.mark.asyncio
-async def test_output_with_tool_calls(sentry_init, capture_events, get_test_agent):
+async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent):
     """
     Test that tool calls in model response are captured correctly.
     """
@@ -1533,6 +1601,7 @@ async def test_output_with_tool_calls(sentry_init, capture_events, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1542,14 +1611,15 @@ def calc_tool(value: int) -> int:
         """Calculate something."""
         return value + 10
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use calc_tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # At least one chat span should exist
     assert len(chat_spans) >= 1
@@ -1558,11 +1628,11 @@ def calc_tool(value: int) -> int:
     for chat_span in chat_spans:
         # Tool calls may or may not be in response depending on TestModel behavior
         # Just verify the span was created and has basic data
-        assert "gen_ai.operation.name" in chat_span["data"]
+        assert "gen_ai.operation.name" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_message_formatting_with_different_parts(sentry_init, capture_events):
+async def test_message_formatting_with_different_parts(sentry_init, capture_items):
     """
     Test that different message part types are handled correctly in ai_client span.
     """
@@ -1577,9 +1647,10 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Create message history with different part types
     history = [
@@ -1594,24 +1665,25 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even
 
     await agent.run("What did I say?", message_history=history)
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Should have chat spans
     assert len(chat_spans) >= 1
 
     # Check that messages are captured
     chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["data"]:
-        messages_data = chat_span["data"]["gen_ai.request.messages"]
+    if "gen_ai.request.messages" in chat_span["attributes"]:
+        messages_data = chat_span["attributes"]["gen_ai.request.messages"]
         # Should contain message history
         assert messages_data is not None
 
 
 @pytest.mark.asyncio
-async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_events):
+async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_items):
     """
     Test that update_invoke_agent_span handles None output gracefully.
     """
@@ -1624,6 +1696,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_ev
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1639,7 +1712,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_update_ai_client_span_with_none_response(sentry_init, capture_events):
+async def test_update_ai_client_span_with_none_response(sentry_init, capture_items):
     """
     Test that update_ai_client_span handles None response gracefully.
     """
@@ -1651,6 +1724,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_eve
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1666,7 +1740,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_eve
 
 
 @pytest.mark.asyncio
-async def test_agent_without_name(sentry_init, capture_events):
+async def test_agent_without_name(sentry_init, capture_items):
     """
     Test that agent without a name is handled correctly.
     """
@@ -1676,22 +1750,21 @@ async def test_agent_without_name(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-
     # Should still create transaction, just with default name
-    assert transaction["type"] == "transaction"
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Transaction name should be "invoke_agent agent" or similar default
     assert "invoke_agent" in transaction["transaction"]
 
 
 @pytest.mark.asyncio
-async def test_model_response_without_parts(sentry_init, capture_events):
+async def test_model_response_without_parts(sentry_init, capture_items):
     """
     Test handling of model response without parts attribute.
     """
@@ -1703,6 +1776,7 @@ async def test_model_response_without_parts(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1723,7 +1797,7 @@ async def test_model_response_without_parts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_input_messages_error_handling(sentry_init, capture_events):
+async def test_input_messages_error_handling(sentry_init, capture_items):
     """
     Test that _set_input_messages handles errors gracefully.
     """
@@ -1733,6 +1807,7 @@ async def test_input_messages_error_handling(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1751,7 +1826,7 @@ async def test_input_messages_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_available_tools_error_handling(sentry_init, capture_events):
+async def test_available_tools_error_handling(sentry_init, capture_items):
     """
     Test that _set_available_tools handles errors gracefully.
     """
@@ -1762,6 +1837,7 @@ async def test_available_tools_error_handling(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1781,7 +1857,7 @@ async def test_available_tools_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_none_usage(sentry_init, capture_events):
+async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     """
     Test that _set_usage_data handles None usage gracefully.
     """
@@ -1791,6 +1867,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1806,7 +1883,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_partial_fields(sentry_init, capture_events):
+async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     """
     Test that _set_usage_data handles usage with only some fields.
     """
@@ -1817,6 +1894,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1838,7 +1916,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_parts_with_tool_return(sentry_init, capture_events):
+async def test_message_parts_with_tool_return(sentry_init, capture_items):
     """
     Test that ToolReturnPart messages are handled correctly.
     """
@@ -1858,24 +1936,26 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Run with history containing tool return
     await agent.run("Use test_tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Should have chat spans
     assert len(chat_spans) >= 1
 
 
 @pytest.mark.asyncio
-async def test_message_parts_with_list_content(sentry_init, capture_events):
+async def test_message_parts_with_list_content(sentry_init, capture_items):
     """
     Test that message parts with list content are handled correctly.
     """
@@ -1886,6 +1966,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1910,7 +1991,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events):
+async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
     """
     Test that _set_output_data handles both text and tool calls in response.
     """
@@ -1922,6 +2003,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events)
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1949,7 +2031,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events)
 
 
 @pytest.mark.asyncio
-async def test_output_data_error_handling(sentry_init, capture_events):
+async def test_output_data_error_handling(sentry_init, capture_items):
     """
     Test that _set_output_data handles errors in formatting gracefully.
     """
@@ -1961,6 +2043,7 @@ async def test_output_data_error_handling(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1981,7 +2064,7 @@ async def test_output_data_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_with_system_prompt_part(sentry_init, capture_events):
+async def test_message_with_system_prompt_part(sentry_init, capture_items):
     """
     Test that SystemPromptPart is handled with correct role.
     """
@@ -1993,6 +2076,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2017,7 +2101,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_with_instructions(sentry_init, capture_events):
+async def test_message_with_instructions(sentry_init, capture_items):
     """
     Test that messages with instructions field are handled correctly.
     """
@@ -2028,6 +2112,7 @@ async def test_message_with_instructions(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2052,7 +2137,7 @@ async def test_message_with_instructions(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_input_messages_without_prompts(sentry_init, capture_events):
+async def test_set_input_messages_without_prompts(sentry_init, capture_items):
     """
     Test that _set_input_messages respects _should_send_prompts().
     """
@@ -2062,6 +2147,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_events):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2078,7 +2164,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_output_data_without_prompts(sentry_init, capture_events):
+async def test_set_output_data_without_prompts(sentry_init, capture_items):
     """
     Test that _set_output_data respects _should_send_prompts().
     """
@@ -2090,6 +2176,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_events):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2107,7 +2194,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_exception_in_callable(sentry_init, capture_events):
+async def test_get_model_name_with_exception_in_callable(sentry_init, capture_items):
     """
     Test that _get_model_name handles exceptions in name() callable.
     """
@@ -2117,6 +2204,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_ev
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create model with callable name that raises exception
@@ -2131,7 +2219,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_string_model(sentry_init, capture_events):
+async def test_get_model_name_with_string_model(sentry_init, capture_items):
     """
     Test that _get_model_name handles string models.
     """
@@ -2140,6 +2228,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass a string as model
@@ -2150,7 +2239,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_none(sentry_init, capture_events):
+async def test_get_model_name_with_none(sentry_init, capture_items):
     """
     Test that _get_model_name handles None model.
     """
@@ -2159,6 +2248,7 @@ async def test_get_model_name_with_none(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass None
@@ -2169,7 +2259,7 @@ async def test_get_model_name_with_none(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_with_system(sentry_init, capture_events):
+async def test_set_model_data_with_system(sentry_init, capture_items):
     """
     Test that _set_model_data captures system from model.
     """
@@ -2180,6 +2270,7 @@ async def test_set_model_data_with_system(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2200,7 +2291,7 @@ async def test_set_model_data_with_system(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_from_agent_scope(sentry_init, capture_events):
+async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     """
     Test that _set_model_data retrieves model from agent in scope when not passed.
     """
@@ -2211,6 +2302,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2234,7 +2326,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_with_none_settings_values(sentry_init, capture_events):
+async def test_set_model_data_with_none_settings_values(sentry_init, capture_items):
     """
     Test that _set_model_data skips None values in settings.
     """
@@ -2244,6 +2336,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_eve
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2266,7 +2359,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_eve
 
 
 @pytest.mark.asyncio
-async def test_should_send_prompts_without_pii(sentry_init, capture_events):
+async def test_should_send_prompts_without_pii(sentry_init, capture_items):
     """
     Test that _should_send_prompts returns False when PII disabled.
     """
@@ -2276,6 +2369,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_events):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Should return False
@@ -2284,7 +2378,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_without_agent(sentry_init, capture_events):
+async def test_set_agent_data_without_agent(sentry_init, capture_items):
     """
     Test that _set_agent_data handles None agent gracefully.
     """
@@ -2294,6 +2388,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2309,7 +2404,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_from_scope(sentry_init, capture_events):
+async def test_set_agent_data_from_scope(sentry_init, capture_items):
     """
     Test that _set_agent_data retrieves agent from scope when not passed.
     """
@@ -2320,6 +2415,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2341,7 +2437,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_without_name(sentry_init, capture_events):
+async def test_set_agent_data_without_name(sentry_init, capture_items):
     """
     Test that _set_agent_data handles agent without name attribute.
     """
@@ -2352,6 +2448,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2371,7 +2468,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_available_tools_without_toolset(sentry_init, capture_events):
+async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     """
     Test that _set_available_tools handles agent without toolset.
     """
@@ -2382,6 +2479,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2401,7 +2499,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_available_tools_with_schema(sentry_init, capture_events):
+async def test_set_available_tools_with_schema(sentry_init, capture_items):
     """
     Test that _set_available_tools extracts tool schema correctly.
     """
@@ -2412,6 +2510,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2437,7 +2536,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_creation(sentry_init, capture_events):
+async def test_execute_tool_span_creation(sentry_init, capture_items):
     """
     Test direct creation of execute_tool span.
     """
@@ -2451,6 +2550,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2464,7 +2564,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
+async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
     """
     Test execute_tool span with MCP tool type.
     """
@@ -2477,6 +2577,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2490,7 +2591,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_without_prompts(sentry_init, capture_events):
+async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
     """
     Test that execute_tool span respects _should_send_prompts().
     """
@@ -2504,6 +2605,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_events):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2517,7 +2619,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_with_none_args(sentry_init, capture_events):
+async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
     """
     Test execute_tool span with None args.
     """
@@ -2528,6 +2630,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2540,7 +2643,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_update_execute_tool_span_with_none_span(sentry_init, capture_events):
+async def test_update_execute_tool_span_with_none_span(sentry_init, capture_items):
     """
     Test that update_execute_tool_span handles None span gracefully.
     """
@@ -2551,6 +2654,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_even
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Update with None span - should not raise
@@ -2561,7 +2665,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_even
 
 
 @pytest.mark.asyncio
-async def test_update_execute_tool_span_with_none_result(sentry_init, capture_events):
+async def test_update_execute_tool_span_with_none_result(sentry_init, capture_items):
     """
     Test that update_execute_tool_span handles None result gracefully.
     """
@@ -2575,6 +2679,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_ev
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2588,7 +2693,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_tool_execution_without_span_context(sentry_init, capture_events):
+async def test_tool_execution_without_span_context(sentry_init, capture_items):
     """
     Test that tool execution patch handles case when no span context exists.
     This tests the code path where current_span is None in _patch_tool_execution.
@@ -2598,6 +2703,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a simple agent with no tools (won't have function_toolset)
@@ -2617,7 +2723,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_events):
+async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_items):
     """
     Test that invoke_agent_span skips callable instructions correctly.
     """
@@ -2629,6 +2735,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2650,7 +2757,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_events):
+async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_items):
     """
     Test that invoke_agent_span handles string instructions (not list).
     """
@@ -2662,6 +2769,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_e
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2680,7 +2788,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_e
 
 
 @pytest.mark.asyncio
-async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events):
+async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     """
     Test that ai_client_span reads streaming flag from scope.
     """
@@ -2690,6 +2798,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2706,7 +2815,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events):
+async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     """
     Test that ai_client_span gets agent from scope when not passed.
     """
@@ -2717,6 +2826,7 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events)
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2759,15 +2869,16 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type):
 
 
 @pytest.mark.asyncio
-async def test_binary_content_encoding_image(sentry_init, capture_events):
+async def test_binary_content_encoding_image(sentry_init, capture_items):
     """Test that BinaryContent with image data is properly encoded in messages."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2782,22 +2893,23 @@ async def test_binary_content_encoding_image(sentry_init, capture_events):
         _set_input_messages(span, [mock_msg])
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
     assert _find_binary_content(messages_data, "image", "image/png")
 
 
 @pytest.mark.asyncio
-async def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
+async def test_binary_content_encoding_mixed_content(sentry_init, capture_items):
     """Test that BinaryContent mixed with text content is properly handled."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2814,7 +2926,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events
         _set_input_messages(span, [mock_msg])
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
 
@@ -2830,7 +2942,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events
 
 
 @pytest.mark.asyncio
-async def test_binary_content_in_agent_run(sentry_init, capture_events):
+async def test_binary_content_in_agent_run(sentry_init, capture_items):
     """Test that BinaryContent in actual agent run is properly captured in spans."""
     agent = Agent("test", name="test_binary_agent")
 
@@ -2838,30 +2950,37 @@ async def test_binary_content_in_agent_run(sentry_init, capture_events):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
     binary_content = BinaryContent(
         data=b"fake_image_data_for_testing", media_type="image/png"
     )
     await agent.run(["Analyze this image:", binary_content])
 
-    (transaction,) = events
-    chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["data"]:
-        messages_str = str(chat_span["data"]["gen_ai.request.messages"])
+    if "gen_ai.request.messages" in chat_span["attributes"]:
+        messages_str = str(chat_span["attributes"]["gen_ai.request.messages"])
         assert any(keyword in messages_str for keyword in ["blob", "image", "base64"])
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
+async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
-    sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[PydanticAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2874,7 +2993,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
         _set_usage_data(span, usage)
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     (span_data,) = event["spans"]
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
@@ -2922,7 +3041,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
     ],
 )
 def test_image_url_base64_content_in_span(
-    sentry_init, capture_events, url, image_url_kwargs, expected_content
+    sentry_init, capture_items, url, image_url_kwargs, expected_content
 ):
     from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span
 
@@ -2930,9 +3049,10 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         image_url = ImageUrl(url=url, **image_url_kwargs)
@@ -2944,10 +3064,12 @@ def test_image_url_base64_content_in_span(
         span = ai_client_span([mock_msg], None, None, None)
         span.finish()
 
-    (event,) = events
-    chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
-    messages_data = _get_messages_from_span(chat_spans[0]["data"])
+    messages_data = _get_messages_from_span(chat_spans[0]["attributes"])
 
     found_image = False
     for msg in messages_data:
@@ -2992,27 +3114,29 @@ def test_image_url_base64_content_in_span(
     ],
 )
 async def test_invoke_agent_image_url(
-    sentry_init, capture_events, url, image_url_kwargs, expected_content
+    sentry_init, capture_items, url, image_url_kwargs, expected_content
 ):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     agent = Agent("test", name="test_image_url_agent")
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
     image_url = ImageUrl(url=url, **image_url_kwargs)
     await agent.run([image_url, "Describe this image"])
 
-    (transaction,) = events
-
     found_image = False
 
-    chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     for chat_span in chat_spans:
-        messages_data = _get_messages_from_span(chat_span["data"])
+        messages_data = _get_messages_from_span(chat_span["attributes"])
         for msg in messages_data:
             if "content" not in msg:
                 continue
@@ -3025,7 +3149,7 @@ async def test_invoke_agent_image_url(
 
 
 @pytest.mark.asyncio
-async def test_tool_description_in_execute_tool_span(sentry_init, capture_events):
+async def test_tool_description_in_execute_tool_span(sentry_init, capture_items):
     """
     Test that tool description from the tool's docstring is included in execute_tool spans.
     """
@@ -3044,20 +3168,27 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     result = await agent.run("What is 5 times 3?")
     assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
     assert len(tool_spans) >= 1
 
     tool_span = tool_spans[0]
-    assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers"
-    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"]
-    assert "Multiply two numbers" in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers"
+    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"]
+    assert (
+        "Multiply two numbers"
+        in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+    )
diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index 15432f5862..d370b4bbc9 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -121,9 +121,12 @@ async def _some_function_traced(a, b, c):
     )
 
 
-def test_span_templates_ai_dicts(sentry_init, capture_events):
-    sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+def test_span_templates_ai_dicts(sentry_init, capture_items):
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
@@ -166,40 +169,54 @@ def my_agent():
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent()
 
-    (event,) = events
-    (agent_span, tool_span, chat_span) = event["spans"]
+    (agent_span, tool_span, chat_span) = (
+        item.payload for item in items if item.type == "span"
+    )
 
-    assert agent_span["op"] == "gen_ai.invoke_agent"
     assert (
-        agent_span["description"]
+        agent_span["name"]
         == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
     )
-    assert agent_span["data"] == {
+    assert agent_span["attributes"] == {
         "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_agent",
         "gen_ai.operation.name": "invoke_agent",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.invoke_agent",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert tool_span["op"] == "gen_ai.execute_tool"
     assert (
-        tool_span["description"]
+        tool_span["name"]
         == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
     )
-    assert tool_span["data"] == {
+    assert tool_span["attributes"] == {
         "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_tool",
         "gen_ai.operation.name": "execute_tool",
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 20,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.execute_tool",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
-    assert "gen_ai.tool.description" not in tool_span["data"]
+    assert "gen_ai.tool.description" not in tool_span["attributes"]
 
-    assert chat_span["op"] == "gen_ai.chat"
-    assert chat_span["description"] == "chat my-gpt-4o-mini"
-    assert chat_span["data"] == {
+    assert chat_span["name"] == "chat my-gpt-4o-mini"
+    assert chat_span["attributes"] == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.frequency_penalty": 1.0,
         "gen_ai.request.max_tokens": 100,
@@ -213,14 +230,25 @@ def my_agent():
         "gen_ai.usage.input_tokens": 11,
         "gen_ai.usage.output_tokens": 22,
         "gen_ai.usage.total_tokens": 33,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
 
-def test_span_templates_ai_objects(sentry_init, capture_events):
-    sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+def test_span_templates_ai_objects(sentry_init, capture_items):
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
@@ -267,40 +295,54 @@ def my_agent():
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent()
 
-    (event,) = events
-    (agent_span, tool_span, chat_span) = event["spans"]
+    (agent_span, tool_span, chat_span) = (
+        item.payload for item in items if item.type == "span"
+    )
 
-    assert agent_span["op"] == "gen_ai.invoke_agent"
     assert (
-        agent_span["description"]
+        agent_span["name"]
         == "invoke_agent test_decorator.test_span_templates_ai_objects.<locals>.my_agent"
     )
-    assert agent_span["data"] == {
+    assert agent_span["attributes"] == {
         "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_agent",
         "gen_ai.operation.name": "invoke_agent",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.invoke_agent",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert tool_span["op"] == "gen_ai.execute_tool"
     assert (
-        tool_span["description"]
+        tool_span["name"]
         == "execute_tool test_decorator.test_span_templates_ai_objects.<locals>.my_tool"
     )
-    assert tool_span["data"] == {
+    assert tool_span["attributes"] == {
         "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_tool",
         "gen_ai.tool.description": "This is a tool function.",
         "gen_ai.operation.name": "execute_tool",
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 20,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.execute_tool",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert chat_span["op"] == "gen_ai.chat"
-    assert chat_span["description"] == "chat my-gpt-4o-mini"
-    assert chat_span["data"] == {
+    assert chat_span["name"] == "chat my-gpt-4o-mini"
+    assert chat_span["attributes"] == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.frequency_penalty": 1.0,
         "gen_ai.request.max_tokens": 100,
@@ -314,15 +356,27 @@ def my_agent():
         "gen_ai.usage.input_tokens": 11,
         "gen_ai.usage.output_tokens": 22,
         "gen_ai.usage.total_tokens": 33,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
-def test_span_templates_ai_pii(sentry_init, capture_events, send_default_pii):
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
-    events = capture_events()
+def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii):
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2, **kwargs):
@@ -352,15 +406,14 @@ def my_agent(*args, **kwargs):
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent(22, 33, arg1=44, arg2=55)
 
-    (event,) = events
-    (_, tool_span, _) = event["spans"]
+    (_, tool_span, _) = (item.payload for item in items if item.type == "span")
 
     if send_default_pii:
         assert (
-            tool_span["data"]["gen_ai.tool.input"]
+            tool_span["attributes"]["gen_ai.tool.input"]
             == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}"
         )
-        assert tool_span["data"]["gen_ai.tool.output"] == "'tool_output'"
+        assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'"
     else:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 8895c98dbc..4209a02b4b 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -647,11 +647,14 @@ def test_conversation_id_propagates_to_span_with_ai_op(
         assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test"
 
     def test_conversation_id_propagates_to_span_with_gen_ai_op(
-        self, sentry_init, capture_events
+        self, sentry_init, capture_items
     ):
         """Span with gen_ai.* op should get conversation_id."""
-        sentry_init(traces_sample_rate=1.0)
-        events = capture_events()
+        sentry_init(
+            traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
+        )
+        items = capture_items("span")
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-gen-ai-op-test")
@@ -660,8 +663,8 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op(
             with start_span(op="gen_ai.invoke_agent"):
                 pass
 
-        (event,) = events
-        span_data = event["spans"][0]["data"]
+        spans = [item.payload for item in items if item.type == "span"]
+        span_data = spans[0]["attributes"]
         assert span_data.get("gen_ai.conversation.id") == "conv-gen-ai-op-test"
 
     def test_conversation_id_not_propagated_to_non_ai_span(