moly_kit/widgets/
realtime.rs

1use crate::widgets::{
2    avatar::AvatarWidgetRefExt, slot::SlotWidgetRefExt,
3    standard_message_content::StandardMessageContentWidgetRefExt,
4};
5use crate::{protocol::*, utils::makepad::events::EventExt};
6use makepad_widgets::permission::Permission;
7use makepad_widgets::permission::PermissionStatus;
8use makepad_widgets::{makepad_platform::AudioDeviceType, *};
9use std::sync::{Arc, Mutex};
10
11live_design! {
12    use link::theme::*;
13    use link::shaders::*;
14    use link::widgets::*;
15
16    use crate::widgets::chat_lines::*;
17    use crate::widgets::standard_message_content::*;
18
19    AIAnimation = <RoundedView> {
20        width: 200, height: 200
21        show_bg: true
22        // Shader based on "Branded AI assistant" by Vickone (https://www.shadertoy.com/view/tfcGD8)
23        // Licensed under CC BY-NC-SA 3.0
24        draw_bg: {
25            // Simple hash function
26            fn hash21(self, p: vec2) -> float {
27                let mut p = fract(p * vec2(234.34, 435.345));
28                p += dot(p, p + 34.23);
29                return fract(p.x * p.y);
30            }
31
32            // Simple noise function
33            fn noise(self, p: vec2) -> float {
34                let i = floor(p);
35                let f = fract(p);
36                let f_smooth = f * f * (3.0 - 2.0 * f);
37                let a = self.hash21(i);
38                let b = self.hash21(i + vec2(1.0, 0.0));
39                let c = self.hash21(i + vec2(0.0, 1.0));
40                let d = self.hash21(i + vec2(1.0, 1.0));
41                return mix(mix(a, b, f_smooth.x), mix(c, d, f_smooth.x), f_smooth.y);
42            }
43
44            // Simplified FBM (fractal brownian motion)
45            fn fbm(self, p: vec2) -> float {
46                let mut sum = 0.0;
47                let mut amp = 0.5;
48                let mut freq = 1.0;
49
50                // Unroll the loop for compatibility
51                sum += self.noise(p * freq) * amp;
52                amp *= 0.5;
53                freq *= 2.0;
54
55                sum += self.noise(p * freq) * amp;
56                amp *= 0.5;
57                freq *= 2.0;
58
59                sum += self.noise(p * freq) * amp;
60                amp *= 0.5;
61                freq *= 2.0;
62
63                sum += self.noise(p * freq) * amp;
64                amp *= 0.5;
65                freq *= 2.0;
66
67                return sum;
68            }
69
70            fn pixel(self) -> vec4 {
71                // Center and aspect-correct UV coordinates
72                let uv = (self.pos - 0.5) * 2.0;
73
74                let mut col = vec3(0.1, 0.1, 0.1);
75                // let mut col = vec3(0.0, 0.0, 0.0);
76
77                let radius = 0.3 + sin(self.time * 0.5) * 0.02;
78                let d = length(uv);
79
80                let angle = atan(uv.y, uv.x);
81                let wave = sin(angle * 3.0 + self.time) * 0.1;
82                let wave2 = cos(angle * 5.0 - self.time * 1.3) * 0.08;
83
84                let noise1 = self.fbm(uv * 3.0 + self.time * 0.1);
85                let noise2 = self.fbm(uv * 5.0 - self.time * 0.2);
86
87                let orb_color = vec3(0.2, 0.6, 1.0);
88                let orb = smoothstep(radius + wave + wave2, radius - 0.1 + wave + wave2, d);
89
90                let gradient1 = vec3(0.8, 0.2, 0.5) * sin(angle + self.time);
91                let gradient2 = vec3(0.2, 0.5, 1.0) * cos(angle - self.time * 0.7);
92
93                // Simplified particles (unrolled loop)
94                let mut particles = 0.0;
95
96                // Particle 1
97                let particle_pos1 = vec2(
98                    sin(self.time * 0.5) * 0.5,
99                    cos(self.time * 0.3) * 0.5
100                );
101                particles += smoothstep(0.05, 0.0, length(uv - particle_pos1));
102
103                // Particle 2
104                let particle_pos2 = vec2(
105                    sin(self.time * 0.7) * 0.5,
106                    cos(self.time * 0.5) * 0.5
107                );
108                particles += smoothstep(0.05, 0.0, length(uv - particle_pos2));
109
110                // Particle 3
111                let particle_pos3 = vec2(
112                    sin(self.time * 0.9) * 0.5,
113                    cos(self.time * 0.7) * 0.5
114                );
115                particles += smoothstep(0.05, 0.0, length(uv - particle_pos3));
116
117                // Combine all effects
118                col += orb * mix(orb_color, gradient1, noise1);
119                col += orb * mix(gradient2, orb_color, noise2) * 0.5;
120                col += particles * vec3(0.5, 0.8, 1.0);
121                col += exp(-d * 4.0) * vec3(0.2, 0.4, 0.8) * 0.5;
122
123                // return vec4(col, 1.0);
124
125                // Clip the final output to a circle
126                let sdf = Sdf2d::viewport(self.pos * self.rect_size);
127                let radius = min(self.rect_size.x, self.rect_size.y) * 0.5;
128                sdf.circle(
129                    self.rect_size.x * 0.5,
130                    self.rect_size.y * 0.5,
131                    radius
132                );
133
134                sdf.fill_keep(vec4(col, 1.0));
135
136                return sdf.result;
137            }
138        }
139    }
140
141    SimpleDropDown = <DropDown> {
142        draw_text: {
143            text_style: {font_size: 12}
144            fn get_color(self) -> vec4 {
145                return mix(
146                    #2,
147                    #x0,
148                    self.down
149                )
150            }
151        }
152
153        popup_menu: {
154            width: 300, height: Fit,
155            flow: Down,
156            padding: <THEME_MSPACE_1> {}
157
158            menu_item: <PopupMenuItem> {
159                width: Fill, height: Fit,
160                align: { y: 0.5 }
161                padding: {left: 15, right: 15, top: 10, bottom: 10}
162
163                draw_text: {
164                    fn get_color(self) -> vec4 {
165                        return mix(
166                            mix(
167                                #3,
168                                #x0,
169                                self.active
170                            ),
171                            #x0,
172                            self.hover
173                        )
174                    }
175                }
176
177                draw_bg: {
178                    instance color: #f //(THEME_COLOR_FLOATING_BG)
179                    instance color_active: #e9 //(THEME_COLOR_CTRL_HOVER)
180                }
181            }
182
183            draw_bg: {
184                instance color: #f9 //(THEME_COLOR_FLOATING_BG)
185                border_size: 1.0
186            }
187        }
188    }
189
190    TranscriptionModelSelector = <View> {
191        height: Fit
192        align: {x: 0.0, y: 0.5}
193        spacing: 10
194
195        <Label> {
196            text: "Transcription model:"
197            draw_text: {
198                color: #222
199                text_style: {font_size: 11}
200            }
201        }
202
203        transcription_model_selector = <SimpleDropDown> {
204            margin: 5
205            labels: ["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]
206            values: [whisper_1, gpt_4o_transcribe, gpt_4o_mini_transcribe]
207
208            draw_text: {
209                color: #222
210                text_style: {font_size: 11}
211            }
212
213            popup_menu = {
214                draw_text: {
215                    color: #222
216                    text_style: {font_size: 11}
217                }
218            }
219        }
220    }
221
222    VoiceSelector = <View> {
223        height: Fit
224        align: {x: 0.0, y: 0.5}
225        spacing: 10
226
227        <Label> {
228            text: "Voice:"
229            draw_text: {
230                color: #222
231                text_style: {font_size: 11}
232            }
233        }
234
235        voice_selector = <SimpleDropDown> {
236            margin: 5
237            labels: ["marin", "cedar", "alloy", "shimmer", "ash", "ballad", "coral", "echo", "sage", "verse"]
238            values: [marin, cedar, alloy, shimmer, ash, ballad, coral, echo, sage, verse]
239
240            draw_text: {
241                color: #222
242                text_style: {font_size: 11}
243            }
244
245            popup_menu = {
246                draw_text: {
247                    color: #222
248                    text_style: {font_size: 11}
249                }
250            }
251        }
252    }
253
254    IconButton = <Button> {
255        width: Fit, height: Fit
256        draw_text: {
257            text_style: <THEME_FONT_ICONS> {
258                font_size: 14.
259            }
260            color: #5,
261            color_hover: #2,
262            color_focus: #2
263            color_down: #5
264        }
265        draw_bg: {
266            color_down: #0000
267            border_radius: 7.
268            border_size: 0.
269        }
270    }
271
272    DeviceSelector = <View> {
273        height: Fit
274        align: {x: 0.0, y: 0.5}
275        spacing: 5
276
277        label = <Label> {
278            draw_text: {
279                color: #222
280                text_style: {font_size: 11}
281            }
282        }
283
284        device_selector = <SimpleDropDown> {
285            margin: 5
286            labels: ["default"]
287            values: [default]
288
289            draw_text: {
290                color: #222
291                text_style: {font_size: 11}
292            }
293
294            popup_menu = {
295                draw_text: {
296                    color: #222
297                    text_style: {font_size: 11}
298                }
299            }
300        }
301    }
302
303    MuteControl = <View> {
304        width: Fit, height: Fit
305        align: {x: 0.5, y: 0.5}
306        cursor: Hand
307        mute_button = <IconButton> {
308            text: ""
309        }
310        mute_status = <Label> {
311            padding: 0
312            text: "Mute"
313            draw_text: {
314                color: #222
315                text_style: {font_size: 11}
316            }
317        }
318    }
319
320    DevicesSelector = <View> {
321        height: Fit, width: Fill
322        flow: Down, spacing: 5
323        <View> {
324            height: Fit
325            mic_selector = <DeviceSelector> {
326                width: Fit
327                label = { text: "Mic:"}
328            }
329            mute_control = <MuteControl> {}
330        }
331        speaker_selector = <DeviceSelector> {
332            label = { text: "Speaker:"}
333        }
334    }
335
336    Controls = <View> {
337        width: Fill, height: Fit
338        flow: Down
339        spacing: 10
340        align: {x: 0.0, y: 0.5}
341        padding: 20
342
343        devices_selector = <DevicesSelector> {}
344        selected_devices_view = <View> {
345            visible: false
346            height: Fit
347            align: {x: 0.0, y: 0.5}
348            selected_devices = <Label> {
349                draw_text: {
350                    text_style: {font_size: 11}
351                    color: #222
352                }
353            }
354        }
355
356        voice_selector_wrapper = <VoiceSelector> {}
357        selected_voice_view = <View> {
358            visible: false
359            height: Fit
360            align: {x: 0.0, y: 0.5}
361            selected_voice = <Label> {
362                draw_text: {
363                    text_style: {font_size: 11}
364                    color: #222
365                }
366            }
367        }
368
369        <TranscriptionModelSelector> {}
370
371        toggle_interruptions = <Toggle> {
372            text: "Allow interruptions\n(requires headphones, no AEC yet)"
373            width: Fit
374            height: Fit
375            draw_text: {
376                fn get_color(self) -> vec4 {
377                    return #222;
378                }
379                text_style: {font_size: 10}
380            }
381
382            label_walk: {
383                margin: {left: 50}
384            }
385            draw_bg: {
386                size: 25.
387            }
388
389            padding: {left: 5, right: 5, top: 5, bottom: 5}
390        }
391
392        status_label = <Label> {
393            text: "Ready to start"
394            width: Fill
395            draw_text: {
396                color: #222
397                wrap: Word
398                text_style: {font_size: 11}
399            }
400        }
401
402        request_permission_button = <RoundedShadowView> {
403            visible: false
404            cursor: Hand
405            margin: {left: 10, right: 10, bottom: 0, top: 10}
406            width: Fill, height: Fit
407            align: {x: 0.5, y: 0.5}
408            padding: {left: 20, right: 20, bottom: 10, top: 10}
409            draw_bg: {
410                color: #f9f9f9
411                border_radius: 4.5,
412                uniform shadow_color: #0002
413                shadow_radius: 8.0,
414                shadow_offset: vec2(0.0,-1.5)
415            }
416            <Label> {
417                text: "Request microphone permission"
418                draw_text: {
419                    text_style: {font_size: 11}
420                    color: #000
421                }
422            }
423        }
424
425        tool_permission_line = <ToolRequestLine> {
426            visible: false
427            margin: {left: 10, right: 10, top: 10}
428        }
429
430        start_stop_button = <RoundedShadowView> {
431            cursor: Hand
432            margin: {left: 10, right: 10, bottom: 0, top: 10}
433            width: Fill, height: Fit
434            align: {x: 0.5, y: 0.5}
435            padding: {left: 20, right: 20, bottom: 10, top: 10}
436            draw_bg: {
437                color: #f9f9f9
438                border_radius: 4.5,
439                uniform shadow_color: #0002
440                shadow_radius: 8.0,
441                shadow_offset: vec2(0.0,-1.5)
442            }
443            stop_start_label = <Label> {
444                text: "Start"
445                draw_text: {
446                    text_style: {font_size: 11}
447                    color: #000
448                }
449            }
450        }
451    }
452
453    pub Realtime = {{Realtime}} <RoundedView> {
454        show_bg: true
455        draw_bg: {
456            color: #f9f9f9
457            border_radius: 10.0
458        }
459        flow: Down
460        spacing: 20
461        width: Fill, height: Fit
462        align: {x: 0.5, y: 0.0}
463        padding: 10
464
465        header = <View> {
466            height: Fit
467            flow: Overlay
468
469            align: {x: 1.0, y: 0.5}
470            close_button = <IconButton> {
471                text: "" // fa-xmark
472            }
473        }
474
475        <AIAnimation> {}
476        <Controls> {}
477    }
478
479    pub RealtimeContent = <RoundedView> {
480        align: {x: 0.5, y: 0.5}
481
482        <AdaptiveView> {
483            Desktop = {
484                width: 450, height: Fit
485                align: {x: 0.5, y: 0.5}
486
487                <CachedWidget> {
488                    realtime = <Realtime>{}
489                }
490            }
491
492            Mobile = {
493                width: Fill, height: Fill
494                align: {x: 0.5, y: 0.5}
495
496                <CachedWidget> {
497                    realtime = <Realtime>{}
498                }
499            }
500        }
501    }
502}
503
504#[derive(Clone, Debug, DefaultNone)]
505pub enum RealtimeModalAction {
506    None,
507    DismissModal,
508}
509
510#[derive(Clone, Debug, Default, PartialEq)]
511enum MicPermissionStatus {
512    #[default]
513    NotDetermined,
514    Requesting,
515    Granted,
516    Denied,
517}
518
519#[derive(Live, LiveHook, Widget)]
520pub struct Realtime {
521    #[deref]
522    view: View,
523
524    #[rust]
525    realtime_channel: Option<RealtimeChannel>,
526
527    #[rust]
528    is_connected: bool,
529
530    #[rust]
531    conversation_active: bool,
532
533    #[rust]
534    transcript: String,
535
536    #[rust]
537    conversation_messages: Vec<(String, Message)>, // (item_id, message) for ordering
538
539    #[rust]
540    recorded_audio: Arc<Mutex<Vec<f32>>>,
541
542    #[rust]
543    playback_audio: Arc<Mutex<Vec<f32>>>,
544
545    /// Whether we should record and send audio
546    #[rust]
547    should_record: Arc<Mutex<bool>>,
548
549    /// Whether the user has muted the microphone
550    #[rust]
551    is_muted: Arc<Mutex<bool>>,
552
553    #[rust]
554    is_playing: Arc<Mutex<bool>>,
555
556    #[rust]
557    playback_position: Arc<Mutex<usize>>,
558
559    #[rust]
560    audio_setup_done: bool,
561
562    #[rust]
563    audio_streaming_timer: Option<Timer>,
564
565    #[rust]
566    ai_is_responding: bool,
567
568    #[rust]
569    user_is_interrupting: bool,
570
571    #[rust]
572    current_assistant_item_id: Option<String>,
573
574    #[rust]
575    selected_voice: String,
576
577    #[rust]
578    has_sent_audio: bool,
579
580    #[rust]
581    should_request_connection: bool,
582
583    #[rust]
584    connection_request_sent: bool,
585
586    #[rust]
587    bot_entity_id: Option<EntityId>,
588
589    #[rust]
590    bot_context: Option<crate::protocol::BotContext>,
591
592    #[rust]
593    pending_tool_call: Option<(String, String, String)>, // (name, call_id, arguments)
594
595    #[rust]
596    audio_devices: Vec<AudioDeviceDesc>,
597
598    #[rust]
599    mic_permission_status: MicPermissionStatus,
600}
601
602impl Widget for Realtime {
603    fn handle_event(&mut self, cx: &mut Cx, event: &Event, scope: &mut Scope) {
604        self.view.handle_event(cx, event, scope);
605        self.widget_match_event(cx, event, scope);
606
607        if let Some(_value) = self
608            .drop_down(id!(transcription_model_selector))
609            .changed(event.actions())
610        {
611            if self.is_connected {
612                self.update_session_config(cx);
613            }
614        }
615
616        if let Some(enabled) = self
617            .check_box(id!(toggle_interruptions))
618            .changed(event.actions())
619        {
620            // // Send interruption configuration to the realtime client
621            // if let Some(channel) = &self.realtime_channel {
622            //     let _ = channel.command_sender.send(RealtimeCommand::SetInterruptionEnabled(enabled));
623            // }
624
625            if enabled && self.conversation_active {
626                *self.should_record.lock().unwrap() = true;
627            }
628        }
629
630        // Handle realtime events
631        self.handle_realtime_events(cx);
632
633        if !self.audio_setup_done
634            && self.mic_permission_status == MicPermissionStatus::NotDetermined
635        {
636            cx.request_permission(Permission::AudioInput);
637            self.mic_permission_status = MicPermissionStatus::Requesting;
638        }
639
640        if !self.audio_setup_done
641            && let Event::PermissionResult(pr) = event
642        {
643            if pr.permission == Permission::AudioInput {
644                match pr.status {
645                    PermissionStatus::Granted => {
646                        self.mic_permission_status = MicPermissionStatus::Granted;
647                        self.setup_audio(cx);
648                        self.audio_setup_done = true;
649                        self.view(id!(start_stop_button)).set_visible(cx, true);
650                    }
651                    PermissionStatus::DeniedCanRetry => {
652                        self.label(id!(status_label)).set_text(cx, "⚠️ Moly needs microphone access to have realtime conversations.\nClick on the button below to trigger another request");
653                        self.view(id!(request_permission_button))
654                            .set_visible(cx, true);
655                        self.view(id!(start_stop_button)).set_visible(cx, false);
656                        self.mic_permission_status = MicPermissionStatus::Denied;
657                    }
658                    _ => {
659                        self.label(id!(status_label)).set_text(cx, "⚠️ Moly does not have access to your microphone.\nTo continue, allow Moly to access your microphone\nin your system settings\nand then restart the app.");
660                        self.view(id!(request_permission_button))
661                            .set_visible(cx, false);
662                        self.view(id!(start_stop_button)).set_visible(cx, false);
663                        self.mic_permission_status = MicPermissionStatus::Denied;
664                    }
665                }
666            }
667        }
668
669        if self.audio_setup_done {
670            // Try to start pending conversation if we got connected
671            self.try_start_pending_conversation(cx);
672        }
673
674        // Handle audio streaming timer
675        if let Some(timer) = &self.audio_streaming_timer {
676            if timer.is_event(event).is_some() && self.conversation_active {
677                self.send_audio_chunk_to_realtime(cx);
678
679                // Check if we should resume recording when playback buffer is empty
680                // This is the backup mechanism for when toggle is OFF (no interruptions)
681                if self.playback_audio.lock().unwrap().is_empty() {
682                    let interruptions_enabled =
683                        self.check_box(id!(toggle_interruptions)).active(cx);
684
685                    if !interruptions_enabled {
686                        // Only auto-resume recording if interruptions are disabled
687                        // (when interruptions are enabled, recording control is handled elsewhere)
688                        if let Ok(mut should_record) = self.should_record.try_lock() {
689                            if !*should_record && self.conversation_active && !self.ai_is_responding
690                            {
691                                ::log::debug!(
692                                    "Auto-resuming recording - playback empty and interruptions disabled"
693                                );
694                                *should_record = true;
695                                self.label(id!(status_label))
696                                    .set_text(cx, "🎤 Listening...");
697                            }
698                        }
699                    }
700                }
701            }
702        }
703    }
704
705    fn draw_walk(&mut self, cx: &mut Cx2d, scope: &mut Scope, walk: Walk) -> DrawStep {
706        self.view.draw_walk(cx, scope, walk)
707    }
708}
709
710impl WidgetMatchEvent for Realtime {
711    /// Triggered at startup and whenever system audio devices change.
712    ///
713    /// We use it to update the list of available audio devices and select the default ones.
714    fn handle_audio_devices(
715        &mut self,
716        cx: &mut Cx,
717        devices: &AudioDevicesEvent,
718        _scope: &mut Scope,
719    ) {
720        let mut input_names = Vec::new();
721        let mut output_names = Vec::new();
722        let mut default_input_name = String::new();
723        let mut default_output_name = String::new();
724
725        devices
726            .descs
727            .iter()
728            .for_each(|desc| match desc.device_type {
729                AudioDeviceType::Input => {
730                    input_names.push(desc.name.clone());
731                    if desc.is_default {
732                        default_input_name = desc.name.clone();
733                    }
734                }
735                AudioDeviceType::Output => {
736                    output_names.push(desc.name.clone());
737                    if desc.is_default {
738                        default_output_name = desc.name.clone();
739                    }
740                }
741            });
742
743        let mic_dropdown = self.drop_down(id!(mic_selector.device_selector));
744        mic_dropdown.set_labels(cx, input_names.clone());
745        mic_dropdown.set_selected_by_label(&default_input_name, cx);
746
747        let speaker_dropdown = self.drop_down(id!(speaker_selector.device_selector));
748        speaker_dropdown.set_labels(cx, output_names.clone());
749        speaker_dropdown.set_selected_by_label(&default_output_name, cx);
750
751        // Automatically switch to default devices
752        // e.g. when a user connects headphones we assume they want to use them right away.
753        // Note: we do not want to automatically switch to default devices if the user has already selected a non-default device, unless
754        // the default device is new (wasn't present in the previous list)
755        let default_input = devices.default_input();
756        let default_output = devices.default_output();
757
758        // The default device is new, assume we want to use it
759        if !self
760            .audio_devices
761            .iter()
762            .any(|d| d.device_type == AudioDeviceType::Input && d.device_id == default_input[0])
763        {
764            cx.use_audio_inputs(&default_input);
765        }
766
767        // The default device is new, assume we want to use it
768        if !self
769            .audio_devices
770            .iter()
771            .any(|d| d.device_type == AudioDeviceType::Output && d.device_id == default_output[0])
772        {
773            cx.use_audio_outputs(&default_output);
774        }
775
776        self.audio_devices = devices.descs.clone();
777    }
778
779    fn handle_actions(&mut self, cx: &mut Cx, actions: &Actions, _scope: &mut Scope) {
780        if self
781            .view(id!(start_stop_button))
782            .finger_down(actions)
783            .is_some()
784        {
785            if self.conversation_active {
786                self.reset_all(cx);
787            } else {
788                self.start_conversation(cx);
789            }
790            self.update_ui(cx);
791        }
792
793        // Handle tool permission buttons from ToolRequestLine
794        if self
795            .view(id!(tool_permission_line))
796            .button(id!(message_section.content_section.tool_actions.approve))
797            .clicked(actions)
798        {
799            self.approve_tool_call(cx);
800        }
801
802        if self
803            .view(id!(tool_permission_line))
804            .button(id!(message_section.content_section.tool_actions.deny))
805            .clicked(actions)
806        {
807            self.deny_tool_call(cx);
808        }
809
810        let speaker_dropdown = self.drop_down(id!(speaker_selector.device_selector));
811        if let Some(_id) = speaker_dropdown.changed(actions) {
812            let selected_device = self
813                .audio_devices
814                .iter()
815                .find(|device| device.name == speaker_dropdown.selected_label());
816            if let Some(device) = selected_device {
817                cx.use_audio_outputs(&[device.device_id]);
818            }
819        }
820
821        let microphone_dropdown = self.drop_down(id!(mic_selector.device_selector));
822        if let Some(_id) = microphone_dropdown.changed(actions) {
823            let selected_device = self
824                .audio_devices
825                .iter()
826                .find(|device| device.name == microphone_dropdown.selected_label());
827            if let Some(device) = selected_device {
828                cx.use_audio_inputs(&[device.device_id]);
829            }
830        }
831
832        // Mute
833        let mute_button = self.button(id!(mute_button));
834        let mute_label = self.label(id!(mute_status));
835        if self.view(id!(mute_control)).finger_down(actions).is_some()
836            || mute_button.clicked(actions)
837        {
838            let mut is_muted = self.is_muted.lock().unwrap();
839            if *is_muted {
840                // Mic was muted, unmute and update button to "Mute"
841                *is_muted = false;
842                mute_button.set_text(cx, ""); // fa-microphone
843                mute_label.set_text(cx, "Mute");
844            } else {
845                *is_muted = true;
846                mute_button.set_text(cx, ""); // fa-microphone-slash
847                mute_label.set_text(cx, "Unmute");
848            }
849        }
850
851        // Mic permissions
852        if self
853            .view(id!(request_permission_button))
854            .finger_up(actions)
855            .is_some()
856        {
857            cx.request_permission(Permission::AudioInput);
858        }
859
860        // Modal close
861        if self.button(id!(close_button)).clicked(actions) {
862            self.reset_state(cx);
863            cx.action(RealtimeModalAction::DismissModal);
864        }
865    }
866}
867
868impl Realtime {
869    pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
870        self.realtime_channel = Some(channel);
871        self.is_connected = true;
872    }
873
874    pub fn set_bot_entity_id(&mut self, cx: &mut Cx, bot_entity_id: EntityId) {
875        self.bot_entity_id = Some(bot_entity_id);
876
877        // TODO: set the available transcription models through the realtime channel.
878        // (determine the list of models in openai_realtime client)
879        // If the provider is not OpenAI, replace `whisper-1` with `whisper`
880        if let Some(EntityId::Bot(bot_id)) = &self.bot_entity_id {
881            if !bot_id.provider().contains("api.openai.com") {
882                let labels = vec![
883                    "whisper".to_string(),
884                    "gpt-4o-transcribe".to_string(),
885                    "gpt-4o-mini-transcribe".to_string(),
886                ];
887                self.drop_down(id!(transcription_model_selector))
888                    .set_labels(cx, labels);
889            }
890        }
891    }
892
893    pub fn set_bot_context(&mut self, bot_context: Option<crate::protocol::BotContext>) {
894        self.bot_context = bot_context;
895    }
896
897    fn try_start_pending_conversation(&mut self, cx: &mut Cx) {
898        if self.is_connected && !self.conversation_active && self.should_request_connection {
899            // We can now start the conversation that was requested
900            self.should_request_connection = false;
901            self.connection_request_sent = false;
902            self.conversation_active = true;
903            self.ai_is_responding = true;
904            self.user_is_interrupting = false;
905            self.current_assistant_item_id = None;
906            *self.should_record.lock().unwrap() = false;
907            self.has_sent_audio = false;
908
909            // Clear previous audio
910            self.recorded_audio.lock().unwrap().clear();
911            self.playback_audio.lock().unwrap().clear();
912            *self.is_playing.lock().unwrap() = false;
913            *self.playback_position.lock().unwrap() = 0;
914            self.transcript.clear();
915
916            self.update_ui(cx);
917            self.start_audio_streaming(cx);
918            self.create_greeting_response(cx);
919        }
920    }
921
922    fn start_conversation(&mut self, cx: &mut Cx) {
923        if !self.is_connected {
924            // Set flag to request reconnection, Chat widget will handle this
925            self.should_request_connection = true;
926            self.connection_request_sent = false;
927            self.label(id!(status_label))
928                .set_text(cx, "Reconnecting...");
929            return;
930        }
931
932        self.conversation_active = true;
933        self.ai_is_responding = true;
934        self.user_is_interrupting = false;
935        self.current_assistant_item_id = None;
936        *self.should_record.lock().unwrap() = false;
937        self.has_sent_audio = false;
938
939        // Clear previous audio
940        self.recorded_audio.lock().unwrap().clear();
941        self.playback_audio.lock().unwrap().clear();
942        *self.is_playing.lock().unwrap() = false;
943        *self.playback_position.lock().unwrap() = 0;
944        self.transcript.clear();
945
946        self.update_ui(cx);
947        self.label(id!(status_label)).set_text(cx, "Loading..."); // This will be removed by the greeting message
948        self.start_audio_streaming(cx);
949        self.create_greeting_response(cx);
950    }
951
952    fn start_audio_streaming(&mut self, cx: &mut Cx) {
953        // Start a timer to send audio chunks periodically
954        if self.audio_streaming_timer.is_none() {
955            let timer = cx.start_interval(0.020); // 20ms intervals
956            self.audio_streaming_timer = Some(timer);
957        }
958    }
959
960    fn send_audio_chunk_to_realtime(&mut self, _cx: &mut Cx) {
961        // Collect audio data and send to realtime client
962        if let Ok(mut recorded) = self.recorded_audio.try_lock() {
963            if !recorded.is_empty() {
964                let audio_data = recorded.clone();
965                recorded.clear();
966
967                // Convert to PCM16 and send
968                let pcm16_data = Self::convert_f32_to_pcm16(&audio_data);
969                if let Some(channel) = &self.realtime_channel {
970                    let _ = channel
971                        .command_sender
972                        .unbounded_send(RealtimeCommand::SendAudio(pcm16_data));
973                }
974            }
975        }
976    }
977
978    /// Common reset logic for both user-initiated reset and connection loss
979    fn reset_conversation_state(
980        &mut self,
981        cx: &mut Cx,
982        status_message: &str,
983        allow_reconnect: bool,
984    ) {
985        self.stop_conversation(cx);
986
987        self.is_connected = false;
988        self.has_sent_audio = false;
989
990        if !allow_reconnect {
991            // Full reset - user clicked stop
992            self.should_request_connection = false;
993            self.connection_request_sent = false;
994        }
995        self.transcript.clear();
996        self.label(id!(status_label)).set_text(cx, status_message);
997
998        // Hide tool permission UI and clear pending tool call
999        self.view(id!(tool_permission_line)).set_visible(cx, false);
1000        self.pending_tool_call = None;
1001
1002        // Show voice selector again
1003        self.view(id!(voice_selector_wrapper)).set_visible(cx, true);
1004        self.view(id!(selected_voice_view)).set_visible(cx, false);
1005
1006        self.update_ui(cx);
1007    }
1008
1009    fn reset_all(&mut self, cx: &mut Cx) {
1010        self.reset_conversation_state(cx, "Ready to start", false);
1011
1012        // Stop the session
1013        if let Some(channel) = &self.realtime_channel {
1014            let _ = channel
1015                .command_sender
1016                .unbounded_send(RealtimeCommand::StopSession);
1017        }
1018    }
1019
1020    fn stop_conversation(&mut self, cx: &mut Cx) {
1021        self.conversation_active = false;
1022        self.ai_is_responding = false;
1023        self.user_is_interrupting = false;
1024        self.current_assistant_item_id = None;
1025        *self.should_record.lock().unwrap() = false;
1026        *self.is_playing.lock().unwrap() = false;
1027
1028        // Stop audio streaming timer
1029        if let Some(timer) = &self.audio_streaming_timer {
1030            cx.stop_timer(*timer);
1031            self.audio_streaming_timer = None;
1032        }
1033
1034        // Clear audio buffers
1035        if let Ok(mut playback) = self.playback_audio.try_lock() {
1036            playback.clear();
1037        }
1038        if let Ok(mut recorded) = self.recorded_audio.try_lock() {
1039            recorded.clear();
1040        }
1041    }
1042
1043    fn handle_realtime_events(&mut self, cx: &mut Cx) {
1044        let events = if let Some(channel) = &self.realtime_channel {
1045            if let Ok(mut receiver_opt) = channel.event_receiver.lock() {
1046                if let Some(receiver) = receiver_opt.as_mut() {
1047                    let mut events = Vec::new();
1048                    while let Ok(Some(event)) = receiver.try_next() {
1049                        events.push(event);
1050                    }
1051                    events
1052                } else {
1053                    Vec::new()
1054                }
1055            } else {
1056                Vec::new()
1057            }
1058        } else {
1059            Vec::new()
1060        };
1061
1062        // Now process events without holding the lock
1063        for event in events {
1064            match event {
1065                RealtimeEvent::SessionReady => {
1066                    self.label(id!(connection_status))
1067                        .set_text(cx, "✅ Connected to OpenAI");
1068                    // self.update_session_config(cx);
1069                }
1070                RealtimeEvent::AudioData(audio_data) => {
1071                    // When we start receiving AI audio, the user is no longer interrupting
1072                    if self.user_is_interrupting {
1073                        self.user_is_interrupting = false;
1074                    }
1075
1076                    self.ai_is_responding = true;
1077
1078                    // Process audio immediately to start playback
1079                    self.add_audio_to_playback(audio_data);
1080
1081                    // Update recording state based on interruption settings
1082                    if self.conversation_active {
1083                        let interruptions_enabled =
1084                            self.check_box(id!(toggle_interruptions)).active(cx);
1085
1086                        if !interruptions_enabled {
1087                            // Interruptions disabled - mute microphone during AI speech
1088                            *self.should_record.lock().unwrap() = false;
1089                        } else {
1090                            // Interruptions enabled - ensure recording is active for real-time interruption
1091                            *self.should_record.lock().unwrap() = true;
1092                        }
1093                    }
1094
1095                    self.label(id!(status_label))
1096                        .set_text(cx, "🔊 Playing audio...");
1097                }
1098                RealtimeEvent::AudioTranscript(text) => {
1099                    self.transcript.push_str(&text);
1100                }
1101                RealtimeEvent::AudioTranscriptCompleted(transcript, item_id) => {
1102                    // Store completed AI transcript as a bot message
1103                    if !transcript.trim().is_empty() {
1104                        let message = Message {
1105                            from: self.bot_entity_id.clone().unwrap_or_default(),
1106                            content: MessageContent {
1107                                text: transcript,
1108                                ..Default::default()
1109                            },
1110                            ..Default::default()
1111                        };
1112                        self.conversation_messages.push((item_id, message));
1113                    }
1114                }
1115                RealtimeEvent::UserTranscriptCompleted(transcript, item_id) => {
1116                    // Store completed user transcript as a user message
1117                    if !transcript.trim().is_empty() {
1118                        let message = Message {
1119                            from: EntityId::User,
1120                            content: MessageContent {
1121                                text: transcript,
1122                                ..Default::default()
1123                            },
1124                            ..Default::default()
1125                        };
1126                        self.conversation_messages.push((item_id, message));
1127                    }
1128                }
1129                RealtimeEvent::SpeechStarted => {
1130                    self.label(id!(status_label))
1131                        .set_text(cx, "🎤 User speech detected");
1132
1133                    self.user_is_interrupting = true;
1134
1135                    // CRITICAL: Clear the playback audio buffer to stop ongoing AI audio
1136                    // This prevents audio accumulation and feedback loops
1137                    if let Ok(mut playbook) = self.playback_audio.try_lock() {
1138                        let cleared_samples = playbook.len();
1139                        playbook.clear();
1140                        ::log::debug!(
1141                            "Cleared {} audio samples from playback buffer to prevent feedback",
1142                            cleared_samples
1143                        );
1144                    }
1145
1146                    // Stop current playback and reset position
1147                    if let Ok(mut is_playing) = self.is_playing.try_lock() {
1148                        *is_playing = false;
1149                    }
1150                    if let Ok(mut position) = self.playback_position.try_lock() {
1151                        *position = 0;
1152                    }
1153
1154                    // Resume recording immediately when user starts speaking
1155                    if self.conversation_active {
1156                        *self.should_record.lock().unwrap() = true;
1157                    }
1158                }
1159                RealtimeEvent::SpeechStopped => {
1160                    self.label(id!(status_label)).set_text(cx, "Processing...");
1161
1162                    // Temporarily stop recording while waiting for response
1163                    if self.conversation_active {
1164                        *self.should_record.lock().unwrap() = false;
1165                    }
1166                }
1167                RealtimeEvent::ResponseCompleted => {
1168                    let status_label = self.label(id!(status_label));
1169                    self.user_is_interrupting = false;
1170                    self.ai_is_responding = false;
1171                    self.current_assistant_item_id = None;
1172
1173                    // Resume recording after AI response is complete
1174                    if self.conversation_active {
1175                        // Check if interruptions are enabled via the toggle
1176                        let interruptions_enabled =
1177                            self.check_box(id!(toggle_interruptions)).active(cx);
1178
1179                        if interruptions_enabled {
1180                            // Allow immediate interruption
1181                            *self.should_record.lock().unwrap() = true;
1182                            status_label.set_text(cx, "✅ Response generated - 🎤 listening again");
1183                        } else {
1184                            // Without interruptions, only resume when playback buffer is truly empty
1185                            if self.playback_audio.lock().unwrap().is_empty() {
1186                                ::log::debug!(
1187                                    "Setting should_record to true - response completed and playback empty"
1188                                );
1189                                *self.should_record.lock().unwrap() = true;
1190                                status_label
1191                                    .set_text(cx, "✅ Response generated - 🎤 listening again");
1192                            } else {
1193                                status_label
1194                                    .set_text(cx, "✅ Response generated - 🔊 playing audio");
1195                                ::log::debug!("Playback still active, keeping recording disabled");
1196                            }
1197                        }
1198                    }
1199                }
1200                RealtimeEvent::FunctionCallRequest {
1201                    name,
1202                    call_id,
1203                    arguments,
1204                } => {
1205                    // Check if dangerous mode is enabled to auto-approve function calls
1206                    let dangerous_mode_enabled = self
1207                        .bot_context
1208                        .as_ref()
1209                        .map(|ctx| {
1210                            ctx.tool_manager()
1211                                .map(|tm| tm.get_dangerous_mode_enabled())
1212                                .unwrap_or(false)
1213                        })
1214                        .unwrap_or(false);
1215
1216                    if dangerous_mode_enabled {
1217                        // Auto-approve function calls in dangerous mode
1218                        use crate::mcp::mcp_manager::display_name_from_namespaced;
1219                        let display_name = display_name_from_namespaced(&name);
1220                        self.label(id!(status_label))
1221                            .set_text(cx, &format!("🔧 Auto-executing tool: {}", display_name));
1222
1223                        // Execute the function call directly
1224                        self.handle_function_call(cx, name, call_id, arguments);
1225                    } else {
1226                        // Show permission request as usual
1227                        self.label(id!(status_label))
1228                            .set_text(cx, &format!("🔧 Tool permission requested: {}", name));
1229
1230                        self.show_tool_permission_request(cx, name, call_id, arguments);
1231                    }
1232                }
1233                RealtimeEvent::Error(error) => {
1234                    ::log::error!("Realtime API error: {}", error);
1235
1236                    if !self.is_connected || !self.conversation_active {
1237                        ::log::debug!(
1238                            "Ignoring error - already disconnected or conversation not active"
1239                        );
1240                        return;
1241                    }
1242
1243                    // Check if this is a connection error
1244                    if error.contains("Connection lost")
1245                        || error.contains("Connection closed")
1246                        || error.contains("Failed to send")
1247                    {
1248                        // Connection was dropped - use common reset but allow reconnection
1249                        self.reset_conversation_state(
1250                            cx,
1251                            "❌ Connection lost. Please restart the conversation.",
1252                            true, // allow_reconnect
1253                        );
1254                    } else {
1255                        // Other types of errors - just display them
1256                        self.label(id!(status_label))
1257                            .set_text(cx, &format!("❌ Error: {}", error));
1258
1259                        // Resume recording on non-connection errors
1260                        if self.conversation_active {
1261                            *self.should_record.lock().unwrap() = true;
1262                        }
1263                    }
1264                }
1265            }
1266        }
1267    }
1268
1269    fn show_tool_permission_request(
1270        &mut self,
1271        cx: &mut Cx,
1272        name: String,
1273        call_id: String,
1274        arguments: String,
1275    ) {
1276        use crate::mcp::mcp_manager::display_name_from_namespaced;
1277
1278        self.pending_tool_call = Some((name.clone(), call_id, arguments));
1279
1280        let tool_line = self.view(id!(tool_permission_line));
1281        tool_line.set_visible(cx, true);
1282
1283        // Configure the tool line
1284        let display_name = display_name_from_namespaced(&name);
1285
1286        tool_line
1287            .avatar(id!(message_section.sender.avatar))
1288            .borrow_mut()
1289            .unwrap()
1290            .avatar = Some(crate::protocol::Picture::Grapheme("T".into()));
1291        tool_line
1292            .label(id!(message_section.sender.name))
1293            .set_text(cx, "Permission Request");
1294
1295        let content = crate::protocol::MessageContent {
1296            text: format!("Tool '{}' is requesting permission to run", display_name),
1297            ..Default::default()
1298        };
1299        tool_line
1300            .slot(id!(message_section.content_section.content))
1301            .current()
1302            .as_standard_message_content()
1303            .set_content(cx, &content);
1304
1305        tool_line
1306            .view(id!(message_section.content_section.tool_actions))
1307            .set_visible(cx, true);
1308
1309        // Pause recording while waiting for permission
1310        *self.should_record.lock().unwrap() = false;
1311
1312        self.view.redraw(cx);
1313    }
1314
1315    fn handle_function_call(
1316        &mut self,
1317        _cx: &mut Cx,
1318        name: String,
1319        call_id: String,
1320        arguments: String,
1321    ) {
1322        let Some(context) = self.bot_context.as_ref().cloned() else {
1323            ::log::error!("No bot context available for function call");
1324            if let Some(channel) = &self.realtime_channel {
1325                let error_result = serde_json::json!({
1326                    "error": "Tool manager not available"
1327                })
1328                .to_string();
1329                let _ = channel.command_sender.unbounded_send(
1330                    crate::protocol::RealtimeCommand::SendFunctionCallResult {
1331                        call_id,
1332                        output: error_result,
1333                    },
1334                );
1335            }
1336            return;
1337        };
1338
1339        let Some(tool_manager) = context.tool_manager() else {
1340            ::log::error!("No tool manager available for function call");
1341            if let Some(channel) = &self.realtime_channel {
1342                let error_result = serde_json::json!({
1343                    "error": "Tool manager not available"
1344                })
1345                .to_string();
1346                let _ = channel.command_sender.unbounded_send(
1347                    crate::protocol::RealtimeCommand::SendFunctionCallResult {
1348                        call_id,
1349                        output: error_result,
1350                    },
1351                );
1352            }
1353            return;
1354        };
1355
1356        let channel = self.realtime_channel.clone();
1357
1358        let future = async move {
1359            // Parse the arguments JSON
1360            let arguments_map = match crate::mcp::mcp_manager::parse_tool_arguments(&arguments) {
1361                Ok(args) => args,
1362                Err(e) => {
1363                    ::log::error!("Failed to parse function call arguments: {}", e);
1364                    if let Some(channel) = &channel {
1365                        let error_result = serde_json::json!({
1366                            "error": e
1367                        })
1368                        .to_string();
1369                        let _ = channel.command_sender.unbounded_send(
1370                            crate::protocol::RealtimeCommand::SendFunctionCallResult {
1371                                call_id,
1372                                output: error_result,
1373                            },
1374                        );
1375                    }
1376                    return;
1377                }
1378            };
1379
1380            let result = tool_manager
1381                .execute_tool_call(&name, &call_id, arguments_map)
1382                .await;
1383
1384            if let Some(channel) = &channel {
1385                let output = if result.is_error {
1386                    serde_json::json!({
1387                        "error": result.content
1388                    })
1389                    .to_string()
1390                } else {
1391                    result.content
1392                };
1393
1394                let _ = channel.command_sender.unbounded_send(
1395                    crate::protocol::RealtimeCommand::SendFunctionCallResult { call_id, output },
1396                );
1397            }
1398        };
1399
1400        crate::utils::asynchronous::spawn(future);
1401    }
1402
1403    fn approve_tool_call(&mut self, cx: &mut Cx) {
1404        if let Some((name, call_id, arguments)) = self.pending_tool_call.take() {
1405            // Hide permission UI
1406            self.view(id!(tool_permission_line)).set_visible(cx, false);
1407
1408            // Update status
1409            use crate::mcp::mcp_manager::display_name_from_namespaced;
1410            let display_name = display_name_from_namespaced(&name);
1411            self.label(id!(status_label))
1412                .set_text(cx, &format!("🔧 Executing tool: {}", display_name));
1413
1414            // Execute the tool
1415            self.handle_function_call(cx, name, call_id, arguments);
1416
1417            // Resume recording if conversation is active
1418            if self.conversation_active {
1419                *self.should_record.lock().unwrap() = true;
1420            }
1421
1422            self.view.redraw(cx);
1423        }
1424    }
1425
1426    fn deny_tool_call(&mut self, cx: &mut Cx) {
1427        if let Some((name, call_id, _arguments)) = self.pending_tool_call.take() {
1428            // Hide permission UI
1429            self.view(id!(tool_permission_line)).set_visible(cx, false);
1430
1431            // Send denial response
1432            if let Some(channel) = &self.realtime_channel {
1433                let denial_result = serde_json::json!({
1434                    "error": "Tool execution denied by user"
1435                })
1436                .to_string();
1437                let _ = channel.command_sender.unbounded_send(
1438                    crate::protocol::RealtimeCommand::SendFunctionCallResult {
1439                        call_id,
1440                        output: denial_result,
1441                    },
1442                );
1443            }
1444
1445            // Update status
1446            use crate::mcp::mcp_manager::display_name_from_namespaced;
1447            let display_name = display_name_from_namespaced(&name);
1448            self.label(id!(status_label))
1449                .set_text(cx, &format!("🚫 Tool '{}' denied", display_name));
1450
1451            // Resume recording if conversation is active
1452            if self.conversation_active {
1453                *self.should_record.lock().unwrap() = true;
1454            }
1455
1456            self.view.redraw(cx);
1457        }
1458    }
1459
1460    fn setup_audio(&mut self, cx: &mut Cx) {
1461        let recorded_audio = self.recorded_audio.clone();
1462        let should_record = self.should_record.clone();
1463        let is_muted = self.is_muted.clone();
1464
1465        // Audio input callback - capture for realtime streaming
1466        cx.audio_input(0, move |info, input_buffer| {
1467            if let Ok(should_record_guard) = should_record.try_lock() {
1468                if let Ok(is_muted_guard) = is_muted.try_lock() {
1469                    if *should_record_guard && !*is_muted_guard {
1470                        if let Ok(mut recorded) = recorded_audio.try_lock() {
1471                            let channel = input_buffer.channel(0);
1472
1473                            // Calculate downsampling ratio from input sample rate to 24kHz
1474                            let input_sample_rate = info.sample_rate;
1475                            let target_sample_rate = 24000.0;
1476                            let downsample_ratio =
1477                                (input_sample_rate / target_sample_rate) as usize;
1478
1479                            // Downsample by taking every nth sample based on the ratio
1480                            // TODO: this is a simple decimation - for better quality, we should use proper filtering
1481                            for i in (0..channel.len()).step_by(downsample_ratio) {
1482                                recorded.push(channel[i]);
1483                            }
1484                        }
1485                    }
1486                }
1487            }
1488        });
1489
1490        let playback_audio = self.playback_audio.clone();
1491        let playback_position = self.playback_position.clone();
1492        let is_playing = self.is_playing.clone();
1493
1494        // Audio output callback - plays AI response audio
1495        cx.audio_output(0, move |info, output_buffer| {
1496            // Always start with silence
1497            output_buffer.zero();
1498
1499            if let Ok(mut playback) = playback_audio.try_lock() {
1500                if let Ok(mut pos) = playback_position.try_lock() {
1501                    if let Ok(mut playing) = is_playing.try_lock() {
1502                        // Check if we should continue playing
1503                        let input_sample_rate = 24000.0; // Input audio sample rate
1504                        let output_sample_rate = info.sample_rate;
1505                        let upsample_ratio = (output_sample_rate / input_sample_rate) as usize;
1506
1507                        if *playing
1508                            && !playback.is_empty()
1509                            && *pos < playback.len() * upsample_ratio
1510                        {
1511                            // Write to all output channels (mono -> stereo if needed)
1512                            let frame_count = output_buffer.frame_count();
1513                            let channel_count = output_buffer.channel_count();
1514
1515                            let mut samples_to_drain = 0;
1516
1517                            for frame_idx in 0..frame_count {
1518                                // Calculate upsampling ratio based on actual sample rates
1519                                // Assuming input audio is 24kHz, calculate the ratio dynamically
1520                                let input_sample_rate = 24000.0; // Input audio sample rate
1521                                let output_sample_rate = info.sample_rate;
1522                                let upsample_ratio =
1523                                    (output_sample_rate / input_sample_rate) as usize;
1524
1525                                let sample_idx = *pos / upsample_ratio; // Map output position to input sample
1526
1527                                if sample_idx < playback.len() {
1528                                    let audio_sample = playback[sample_idx];
1529
1530                                    // Write the same sample to all output channels
1531                                    for channel_idx in 0..channel_count {
1532                                        let channel = output_buffer.channel_mut(channel_idx);
1533                                        channel[frame_idx] = audio_sample;
1534                                    }
1535
1536                                    *pos += 1;
1537
1538                                    // Track how many samples we can safely remove (every upsample_ratio pos increments = 1 sample)
1539                                    if *pos % upsample_ratio == 0 {
1540                                        samples_to_drain += 1;
1541                                    }
1542                                } else {
1543                                    // Reached end of audio data
1544                                    *playing = false;
1545                                    *pos = 0;
1546                                    // Drain remaining samples since we're done
1547                                    samples_to_drain = playback.len();
1548                                    break;
1549                                }
1550                            }
1551
1552                            // Remove consumed samples from the front of the buffer
1553                            if samples_to_drain > 0 && samples_to_drain <= playback.len() {
1554                                playback.drain(..samples_to_drain);
1555                                // Adjust position since we removed samples from the front
1556                                *pos = (*pos).saturating_sub(samples_to_drain * upsample_ratio);
1557                                // ::log::debug!("Drained {} samples, buffer size now: {}, pos: {}",
1558                                //         samples_to_drain, playback.len(), *pos);
1559                            }
1560                        } else {
1561                            // Not playing or no data - ensure we output silence
1562                            if *playing && playback.is_empty() {
1563                                *playing = false;
1564                                *pos = 0;
1565                            }
1566                        }
1567                    }
1568                }
1569            }
1570        });
1571
1572        self.audio_setup_done = true;
1573    }
1574
1575    fn add_audio_to_playback(&mut self, audio_bytes: Vec<u8>) {
1576        // Convert PCM16 bytes back to f32 samples
1577        let samples = Self::convert_pcm16_to_f32(&audio_bytes);
1578
1579        if let Ok(mut playback) = self.playback_audio.try_lock() {
1580            // If we're not currently playing, start fresh playback immediately
1581            if let Ok(mut is_playing) = self.is_playing.try_lock() {
1582                if !*is_playing {
1583                    // Clear old audio data and start fresh playback
1584                    playback.clear();
1585                    *self.playback_position.lock().unwrap() = 0;
1586                    *is_playing = true;
1587                    ::log::debug!(
1588                        "Started fresh playback of AI response audio ({} samples)",
1589                        samples.len()
1590                    );
1591                }
1592            }
1593
1594            playback.extend_from_slice(&samples);
1595        }
1596    }
1597
1598    fn convert_f32_to_pcm16(samples: &[f32]) -> Vec<u8> {
1599        let mut pcm16_bytes = Vec::with_capacity(samples.len() * 2);
1600
1601        for &sample in samples {
1602            let clamped = sample.max(-1.0).min(1.0);
1603            let pcm16_sample = (clamped * 32767.0) as i16;
1604            pcm16_bytes.extend_from_slice(&pcm16_sample.to_le_bytes());
1605        }
1606
1607        pcm16_bytes
1608    }
1609
1610    fn convert_pcm16_to_f32(bytes: &[u8]) -> Vec<f32> {
1611        let mut samples = Vec::with_capacity(bytes.len() / 2);
1612
1613        for chunk in bytes.chunks_exact(2) {
1614            let pcm16_sample = i16::from_le_bytes([chunk[0], chunk[1]]);
1615            let f32_sample = pcm16_sample as f32 / 32767.0;
1616            samples.push(f32_sample);
1617        }
1618
1619        samples
1620    }
1621
1622    fn update_session_config(&mut self, cx: &mut Cx) {
1623        self.selected_voice = self.drop_down(id!(voice_selector)).selected_label();
1624        self.view(id!(voice_selector_wrapper))
1625            .set_visible(cx, false);
1626        self.view(id!(selected_voice_view)).set_visible(cx, true);
1627        self.label(id!(selected_voice)).set_text(
1628            cx,
1629            format!("Selected voice: {}", self.selected_voice).as_str(),
1630        );
1631
1632        // Send updated session config
1633        if let Some(channel) = &self.realtime_channel {
1634            let _ = channel
1635                .command_sender
1636                .unbounded_send(RealtimeCommand::UpdateSessionConfig {
1637                    voice: self.selected_voice.clone(),
1638                    transcription_model: self
1639                        .drop_down(id!(transcription_model_selector))
1640                        .selected_label(),
1641                });
1642        }
1643    }
1644
1645    fn create_greeting_response(&mut self, cx: &mut Cx) {
1646        self.update_session_config(cx);
1647        if let Some(channel) = &self.realtime_channel {
1648            let _ = channel
1649                .command_sender
1650                .unbounded_send(RealtimeCommand::CreateGreetingResponse);
1651        }
1652    }
1653
1654    fn update_ui(&self, cx: &mut Cx) {
1655        if !self.conversation_active {
1656            self.label(id!(stop_start_label))
1657                .set_text(cx, "Start conversation");
1658        } else {
1659            self.label(id!(stop_start_label))
1660                .set_text(cx, "Stop conversation");
1661        }
1662    }
1663
1664    /// Check if the realtime widget is requesting a new connection
1665    pub fn connection_requested(&mut self) -> bool {
1666        if self.should_request_connection && !self.is_connected && !self.connection_request_sent {
1667            self.connection_request_sent = true;
1668            true
1669        } else {
1670            false
1671        }
1672    }
1673
1674    /// Get conversation messages and clear the collection
1675    pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1676        let mut messages_with_ids = std::mem::take(&mut self.conversation_messages);
1677
1678        // Sort by item_id to ensure chronological order
1679        messages_with_ids.sort_by(|a, b| a.0.cmp(&b.0));
1680
1681        // Extract just the messages, maintaining the sorted order
1682        messages_with_ids
1683            .into_iter()
1684            .map(|(_, message)| message)
1685            .collect()
1686    }
1687
1688    /// Add reset_state method for cleanup when modal closes
1689    pub fn reset_state(&mut self, cx: &mut Cx) {
1690        self.reset_all(cx);
1691    }
1692}
1693
1694impl RealtimeRef {
1695    pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
1696        if let Some(mut inner) = self.borrow_mut() {
1697            inner.set_realtime_channel(channel);
1698        }
1699    }
1700
1701    pub fn set_bot_entity_id(&mut self, cx: &mut Cx, bot_entity_id: EntityId) {
1702        if let Some(mut inner) = self.borrow_mut() {
1703            inner.set_bot_entity_id(cx, bot_entity_id);
1704        }
1705    }
1706
1707    pub fn connection_requested(&mut self) -> bool {
1708        if let Some(mut inner) = self.borrow_mut() {
1709            inner.connection_requested()
1710        } else {
1711            false
1712        }
1713    }
1714
1715    pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1716        if let Some(mut inner) = self.borrow_mut() {
1717            inner.take_conversation_messages()
1718        } else {
1719            Vec::new()
1720        }
1721    }
1722
1723    pub fn reset_state(&mut self, cx: &mut Cx) {
1724        if let Some(mut inner) = self.borrow_mut() {
1725            inner.reset_state(cx);
1726        }
1727    }
1728
1729    pub fn set_bot_context(&mut self, bot_context: Option<crate::protocol::BotContext>) {
1730        if let Some(mut inner) = self.borrow_mut() {
1731            inner.set_bot_context(bot_context);
1732        }
1733    }
1734}