1use crate::widgets::{
2 avatar::AvatarWidgetRefExt, slot::SlotWidgetRefExt,
3 standard_message_content::StandardMessageContentWidgetRefExt,
4};
5use crate::{protocol::*, utils::makepad::events::EventExt};
6use makepad_widgets::permission::Permission;
7use makepad_widgets::permission::PermissionStatus;
8use makepad_widgets::{makepad_platform::AudioDeviceType, *};
9use std::sync::{Arc, Mutex};
10
11live_design! {
12 use link::theme::*;
13 use link::shaders::*;
14 use link::widgets::*;
15
16 use crate::widgets::chat_lines::*;
17 use crate::widgets::standard_message_content::*;
18
19 AIAnimation = <RoundedView> {
20 width: 200, height: 200
21 show_bg: true
22 draw_bg: {
25 fn hash21(self, p: vec2) -> float {
27 let mut p = fract(p * vec2(234.34, 435.345));
28 p += dot(p, p + 34.23);
29 return fract(p.x * p.y);
30 }
31
32 fn noise(self, p: vec2) -> float {
34 let i = floor(p);
35 let f = fract(p);
36 let f_smooth = f * f * (3.0 - 2.0 * f);
37 let a = self.hash21(i);
38 let b = self.hash21(i + vec2(1.0, 0.0));
39 let c = self.hash21(i + vec2(0.0, 1.0));
40 let d = self.hash21(i + vec2(1.0, 1.0));
41 return mix(mix(a, b, f_smooth.x), mix(c, d, f_smooth.x), f_smooth.y);
42 }
43
44 fn fbm(self, p: vec2) -> float {
46 let mut sum = 0.0;
47 let mut amp = 0.5;
48 let mut freq = 1.0;
49
50 sum += self.noise(p * freq) * amp;
52 amp *= 0.5;
53 freq *= 2.0;
54
55 sum += self.noise(p * freq) * amp;
56 amp *= 0.5;
57 freq *= 2.0;
58
59 sum += self.noise(p * freq) * amp;
60 amp *= 0.5;
61 freq *= 2.0;
62
63 sum += self.noise(p * freq) * amp;
64 amp *= 0.5;
65 freq *= 2.0;
66
67 return sum;
68 }
69
70 fn pixel(self) -> vec4 {
71 let uv = (self.pos - 0.5) * 2.0;
73
74 let mut col = vec3(0.1, 0.1, 0.1);
75 let radius = 0.3 + sin(self.time * 0.5) * 0.02;
78 let d = length(uv);
79
80 let angle = atan(uv.y, uv.x);
81 let wave = sin(angle * 3.0 + self.time) * 0.1;
82 let wave2 = cos(angle * 5.0 - self.time * 1.3) * 0.08;
83
84 let noise1 = self.fbm(uv * 3.0 + self.time * 0.1);
85 let noise2 = self.fbm(uv * 5.0 - self.time * 0.2);
86
87 let orb_color = vec3(0.2, 0.6, 1.0);
88 let orb = smoothstep(radius + wave + wave2, radius - 0.1 + wave + wave2, d);
89
90 let gradient1 = vec3(0.8, 0.2, 0.5) * sin(angle + self.time);
91 let gradient2 = vec3(0.2, 0.5, 1.0) * cos(angle - self.time * 0.7);
92
93 let mut particles = 0.0;
95
96 let particle_pos1 = vec2(
98 sin(self.time * 0.5) * 0.5,
99 cos(self.time * 0.3) * 0.5
100 );
101 particles += smoothstep(0.05, 0.0, length(uv - particle_pos1));
102
103 let particle_pos2 = vec2(
105 sin(self.time * 0.7) * 0.5,
106 cos(self.time * 0.5) * 0.5
107 );
108 particles += smoothstep(0.05, 0.0, length(uv - particle_pos2));
109
110 let particle_pos3 = vec2(
112 sin(self.time * 0.9) * 0.5,
113 cos(self.time * 0.7) * 0.5
114 );
115 particles += smoothstep(0.05, 0.0, length(uv - particle_pos3));
116
117 col += orb * mix(orb_color, gradient1, noise1);
119 col += orb * mix(gradient2, orb_color, noise2) * 0.5;
120 col += particles * vec3(0.5, 0.8, 1.0);
121 col += exp(-d * 4.0) * vec3(0.2, 0.4, 0.8) * 0.5;
122
123 let sdf = Sdf2d::viewport(self.pos * self.rect_size);
127 let radius = min(self.rect_size.x, self.rect_size.y) * 0.5;
128 sdf.circle(
129 self.rect_size.x * 0.5,
130 self.rect_size.y * 0.5,
131 radius
132 );
133
134 sdf.fill_keep(vec4(col, 1.0));
135
136 return sdf.result;
137 }
138 }
139 }
140
141 SimpleDropDown = <DropDown> {
142 draw_text: {
143 text_style: {font_size: 12}
144 fn get_color(self) -> vec4 {
145 return mix(
146 #2,
147 #x0,
148 self.down
149 )
150 }
151 }
152
153 popup_menu: {
154 width: 300, height: Fit,
155 flow: Down,
156 padding: <THEME_MSPACE_1> {}
157
158 menu_item: <PopupMenuItem> {
159 width: Fill, height: Fit,
160 align: { y: 0.5 }
161 padding: {left: 15, right: 15, top: 10, bottom: 10}
162
163 draw_text: {
164 fn get_color(self) -> vec4 {
165 return mix(
166 mix(
167 #3,
168 #x0,
169 self.active
170 ),
171 #x0,
172 self.hover
173 )
174 }
175 }
176
177 draw_bg: {
178 instance color: #f instance color_active: #e9 }
181 }
182
183 draw_bg: {
184 instance color: #f9 border_size: 1.0
186 }
187 }
188 }
189
190 TranscriptionModelSelector = <View> {
191 height: Fit
192 align: {x: 0.0, y: 0.5}
193 spacing: 10
194
195 <Label> {
196 text: "Transcription model:"
197 draw_text: {
198 color: #222
199 text_style: {font_size: 11}
200 }
201 }
202
203 transcription_model_selector = <SimpleDropDown> {
204 margin: 5
205 labels: ["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]
206 values: [whisper_1, gpt_4o_transcribe, gpt_4o_mini_transcribe]
207
208 draw_text: {
209 color: #222
210 text_style: {font_size: 11}
211 }
212
213 popup_menu = {
214 draw_text: {
215 color: #222
216 text_style: {font_size: 11}
217 }
218 }
219 }
220 }
221
222 VoiceSelector = <View> {
223 height: Fit
224 align: {x: 0.0, y: 0.5}
225 spacing: 10
226
227 <Label> {
228 text: "Voice:"
229 draw_text: {
230 color: #222
231 text_style: {font_size: 11}
232 }
233 }
234
235 voice_selector = <SimpleDropDown> {
236 margin: 5
237 labels: ["marin", "cedar", "alloy", "shimmer", "ash", "ballad", "coral", "echo", "sage", "verse"]
238 values: [marin, cedar, alloy, shimmer, ash, ballad, coral, echo, sage, verse]
239
240 draw_text: {
241 color: #222
242 text_style: {font_size: 11}
243 }
244
245 popup_menu = {
246 draw_text: {
247 color: #222
248 text_style: {font_size: 11}
249 }
250 }
251 }
252 }
253
254 IconButton = <Button> {
255 width: Fit, height: Fit
256 draw_text: {
257 text_style: <THEME_FONT_ICONS> {
258 font_size: 14.
259 }
260 color: #5,
261 color_hover: #2,
262 color_focus: #2
263 color_down: #5
264 }
265 draw_bg: {
266 color_down: #0000
267 border_radius: 7.
268 border_size: 0.
269 }
270 }
271
272 DeviceSelector = <View> {
273 height: Fit
274 align: {x: 0.0, y: 0.5}
275 spacing: 5
276
277 label = <Label> {
278 draw_text: {
279 color: #222
280 text_style: {font_size: 11}
281 }
282 }
283
284 device_selector = <SimpleDropDown> {
285 margin: 5
286 labels: ["default"]
287 values: [default]
288
289 draw_text: {
290 color: #222
291 text_style: {font_size: 11}
292 }
293
294 popup_menu = {
295 draw_text: {
296 color: #222
297 text_style: {font_size: 11}
298 }
299 }
300 }
301 }
302
303 MuteControl = <View> {
304 width: Fit, height: Fit
305 align: {x: 0.5, y: 0.5}
306 cursor: Hand
307 mute_button = <IconButton> {
308 text: ""
309 }
310 mute_status = <Label> {
311 padding: 0
312 text: "Mute"
313 draw_text: {
314 color: #222
315 text_style: {font_size: 11}
316 }
317 }
318 }
319
320 DevicesSelector = <View> {
321 height: Fit, width: Fill
322 flow: Down, spacing: 5
323 <View> {
324 height: Fit
325 mic_selector = <DeviceSelector> {
326 width: Fit
327 label = { text: "Mic:"}
328 }
329 mute_control = <MuteControl> {}
330 }
331 speaker_selector = <DeviceSelector> {
332 label = { text: "Speaker:"}
333 }
334 }
335
336 Controls = <View> {
337 width: Fill, height: Fit
338 flow: Down
339 spacing: 10
340 align: {x: 0.0, y: 0.5}
341 padding: 20
342
343 devices_selector = <DevicesSelector> {}
344 selected_devices_view = <View> {
345 visible: false
346 height: Fit
347 align: {x: 0.0, y: 0.5}
348 selected_devices = <Label> {
349 draw_text: {
350 text_style: {font_size: 11}
351 color: #222
352 }
353 }
354 }
355
356 voice_selector_wrapper = <VoiceSelector> {}
357 selected_voice_view = <View> {
358 visible: false
359 height: Fit
360 align: {x: 0.0, y: 0.5}
361 selected_voice = <Label> {
362 draw_text: {
363 text_style: {font_size: 11}
364 color: #222
365 }
366 }
367 }
368
369 <TranscriptionModelSelector> {}
370
371 toggle_interruptions = <Toggle> {
372 text: "Allow interruptions\n(requires headphones, no AEC yet)"
373 width: Fit
374 height: Fit
375 draw_text: {
376 fn get_color(self) -> vec4 {
377 return #222;
378 }
379 text_style: {font_size: 10}
380 }
381
382 label_walk: {
383 margin: {left: 50}
384 }
385 draw_bg: {
386 size: 25.
387 }
388
389 padding: {left: 5, right: 5, top: 5, bottom: 5}
390 }
391
392 status_label = <Label> {
393 text: "Ready to start"
394 width: Fill
395 draw_text: {
396 color: #222
397 wrap: Word
398 text_style: {font_size: 11}
399 }
400 }
401
402 request_permission_button = <RoundedShadowView> {
403 visible: false
404 cursor: Hand
405 margin: {left: 10, right: 10, bottom: 0, top: 10}
406 width: Fill, height: Fit
407 align: {x: 0.5, y: 0.5}
408 padding: {left: 20, right: 20, bottom: 10, top: 10}
409 draw_bg: {
410 color: #f9f9f9
411 border_radius: 4.5,
412 uniform shadow_color: #0002
413 shadow_radius: 8.0,
414 shadow_offset: vec2(0.0,-1.5)
415 }
416 <Label> {
417 text: "Request microphone permission"
418 draw_text: {
419 text_style: {font_size: 11}
420 color: #000
421 }
422 }
423 }
424
425 tool_permission_line = <ToolRequestLine> {
426 visible: false
427 margin: {left: 10, right: 10, top: 10}
428 }
429
430 start_stop_button = <RoundedShadowView> {
431 cursor: Hand
432 margin: {left: 10, right: 10, bottom: 0, top: 10}
433 width: Fill, height: Fit
434 align: {x: 0.5, y: 0.5}
435 padding: {left: 20, right: 20, bottom: 10, top: 10}
436 draw_bg: {
437 color: #f9f9f9
438 border_radius: 4.5,
439 uniform shadow_color: #0002
440 shadow_radius: 8.0,
441 shadow_offset: vec2(0.0,-1.5)
442 }
443 stop_start_label = <Label> {
444 text: "Start"
445 draw_text: {
446 text_style: {font_size: 11}
447 color: #000
448 }
449 }
450 }
451 }
452
453 pub Realtime = {{Realtime}} <RoundedView> {
454 show_bg: true
455 draw_bg: {
456 color: #f9f9f9
457 border_radius: 10.0
458 }
459 flow: Down
460 spacing: 20
461 width: Fill, height: Fit
462 align: {x: 0.5, y: 0.0}
463 padding: 10
464
465 header = <View> {
466 height: Fit
467 flow: Overlay
468
469 align: {x: 1.0, y: 0.5}
470 close_button = <IconButton> {
471 text: "" }
473 }
474
475 <AIAnimation> {}
476 <Controls> {}
477 }
478
479 pub RealtimeContent = <RoundedView> {
480 align: {x: 0.5, y: 0.5}
481
482 <AdaptiveView> {
483 Desktop = {
484 width: 450, height: Fit
485 align: {x: 0.5, y: 0.5}
486
487 <CachedWidget> {
488 realtime = <Realtime>{}
489 }
490 }
491
492 Mobile = {
493 width: Fill, height: Fill
494 align: {x: 0.5, y: 0.5}
495
496 <CachedWidget> {
497 realtime = <Realtime>{}
498 }
499 }
500 }
501 }
502}
503
504#[derive(Clone, Debug, DefaultNone)]
505pub enum RealtimeModalAction {
506 None,
507 DismissModal,
508}
509
510#[derive(Clone, Debug, Default, PartialEq)]
511enum MicPermissionStatus {
512 #[default]
513 NotDetermined,
514 Requesting,
515 Granted,
516 Denied,
517}
518
519#[derive(Live, LiveHook, Widget)]
520pub struct Realtime {
521 #[deref]
522 view: View,
523
524 #[rust]
525 realtime_channel: Option<RealtimeChannel>,
526
527 #[rust]
528 is_connected: bool,
529
530 #[rust]
531 conversation_active: bool,
532
533 #[rust]
534 transcript: String,
535
536 #[rust]
537 conversation_messages: Vec<(String, Message)>, #[rust]
540 recorded_audio: Arc<Mutex<Vec<f32>>>,
541
542 #[rust]
543 playback_audio: Arc<Mutex<Vec<f32>>>,
544
545 #[rust]
547 should_record: Arc<Mutex<bool>>,
548
549 #[rust]
551 is_muted: Arc<Mutex<bool>>,
552
553 #[rust]
554 is_playing: Arc<Mutex<bool>>,
555
556 #[rust]
557 playback_position: Arc<Mutex<usize>>,
558
559 #[rust]
560 audio_setup_done: bool,
561
562 #[rust]
563 audio_streaming_timer: Option<Timer>,
564
565 #[rust]
566 ai_is_responding: bool,
567
568 #[rust]
569 user_is_interrupting: bool,
570
571 #[rust]
572 current_assistant_item_id: Option<String>,
573
574 #[rust]
575 selected_voice: String,
576
577 #[rust]
578 has_sent_audio: bool,
579
580 #[rust]
581 should_request_connection: bool,
582
583 #[rust]
584 connection_request_sent: bool,
585
586 #[rust]
587 bot_entity_id: Option<EntityId>,
588
589 #[rust]
590 bot_context: Option<crate::protocol::BotContext>,
591
592 #[rust]
593 pending_tool_call: Option<(String, String, String)>, #[rust]
596 audio_devices: Vec<AudioDeviceDesc>,
597
598 #[rust]
599 mic_permission_status: MicPermissionStatus,
600}
601
602impl Widget for Realtime {
603 fn handle_event(&mut self, cx: &mut Cx, event: &Event, scope: &mut Scope) {
604 self.view.handle_event(cx, event, scope);
605 self.widget_match_event(cx, event, scope);
606
607 if let Some(_value) = self
608 .drop_down(id!(transcription_model_selector))
609 .changed(event.actions())
610 {
611 if self.is_connected {
612 self.update_session_config(cx);
613 }
614 }
615
616 if let Some(enabled) = self
617 .check_box(id!(toggle_interruptions))
618 .changed(event.actions())
619 {
620 if enabled && self.conversation_active {
626 *self.should_record.lock().unwrap() = true;
627 }
628 }
629
630 self.handle_realtime_events(cx);
632
633 if !self.audio_setup_done
634 && self.mic_permission_status == MicPermissionStatus::NotDetermined
635 {
636 cx.request_permission(Permission::AudioInput);
637 self.mic_permission_status = MicPermissionStatus::Requesting;
638 }
639
640 if !self.audio_setup_done
641 && let Event::PermissionResult(pr) = event
642 {
643 if pr.permission == Permission::AudioInput {
644 match pr.status {
645 PermissionStatus::Granted => {
646 self.mic_permission_status = MicPermissionStatus::Granted;
647 self.setup_audio(cx);
648 self.audio_setup_done = true;
649 self.view(id!(start_stop_button)).set_visible(cx, true);
650 }
651 PermissionStatus::DeniedCanRetry => {
652 self.label(id!(status_label)).set_text(cx, "⚠️ Moly needs microphone access to have realtime conversations.\nClick on the button below to trigger another request");
653 self.view(id!(request_permission_button))
654 .set_visible(cx, true);
655 self.view(id!(start_stop_button)).set_visible(cx, false);
656 self.mic_permission_status = MicPermissionStatus::Denied;
657 }
658 _ => {
659 self.label(id!(status_label)).set_text(cx, "⚠️ Moly does not have access to your microphone.\nTo continue, allow Moly to access your microphone\nin your system settings\nand then restart the app.");
660 self.view(id!(request_permission_button))
661 .set_visible(cx, false);
662 self.view(id!(start_stop_button)).set_visible(cx, false);
663 self.mic_permission_status = MicPermissionStatus::Denied;
664 }
665 }
666 }
667 }
668
669 if self.audio_setup_done {
670 self.try_start_pending_conversation(cx);
672 }
673
674 if let Some(timer) = &self.audio_streaming_timer {
676 if timer.is_event(event).is_some() && self.conversation_active {
677 self.send_audio_chunk_to_realtime(cx);
678
679 if self.playback_audio.lock().unwrap().is_empty() {
682 let interruptions_enabled =
683 self.check_box(id!(toggle_interruptions)).active(cx);
684
685 if !interruptions_enabled {
686 if let Ok(mut should_record) = self.should_record.try_lock() {
689 if !*should_record && self.conversation_active && !self.ai_is_responding
690 {
691 ::log::debug!(
692 "Auto-resuming recording - playback empty and interruptions disabled"
693 );
694 *should_record = true;
695 self.label(id!(status_label))
696 .set_text(cx, "🎤 Listening...");
697 }
698 }
699 }
700 }
701 }
702 }
703 }
704
705 fn draw_walk(&mut self, cx: &mut Cx2d, scope: &mut Scope, walk: Walk) -> DrawStep {
706 self.view.draw_walk(cx, scope, walk)
707 }
708}
709
710impl WidgetMatchEvent for Realtime {
711 fn handle_audio_devices(
715 &mut self,
716 cx: &mut Cx,
717 devices: &AudioDevicesEvent,
718 _scope: &mut Scope,
719 ) {
720 let mut input_names = Vec::new();
721 let mut output_names = Vec::new();
722 let mut default_input_name = String::new();
723 let mut default_output_name = String::new();
724
725 devices
726 .descs
727 .iter()
728 .for_each(|desc| match desc.device_type {
729 AudioDeviceType::Input => {
730 input_names.push(desc.name.clone());
731 if desc.is_default {
732 default_input_name = desc.name.clone();
733 }
734 }
735 AudioDeviceType::Output => {
736 output_names.push(desc.name.clone());
737 if desc.is_default {
738 default_output_name = desc.name.clone();
739 }
740 }
741 });
742
743 let mic_dropdown = self.drop_down(id!(mic_selector.device_selector));
744 mic_dropdown.set_labels(cx, input_names.clone());
745 mic_dropdown.set_selected_by_label(&default_input_name, cx);
746
747 let speaker_dropdown = self.drop_down(id!(speaker_selector.device_selector));
748 speaker_dropdown.set_labels(cx, output_names.clone());
749 speaker_dropdown.set_selected_by_label(&default_output_name, cx);
750
751 let default_input = devices.default_input();
756 let default_output = devices.default_output();
757
758 if !self
760 .audio_devices
761 .iter()
762 .any(|d| d.device_type == AudioDeviceType::Input && d.device_id == default_input[0])
763 {
764 cx.use_audio_inputs(&default_input);
765 }
766
767 if !self
769 .audio_devices
770 .iter()
771 .any(|d| d.device_type == AudioDeviceType::Output && d.device_id == default_output[0])
772 {
773 cx.use_audio_outputs(&default_output);
774 }
775
776 self.audio_devices = devices.descs.clone();
777 }
778
779 fn handle_actions(&mut self, cx: &mut Cx, actions: &Actions, _scope: &mut Scope) {
780 if self
781 .view(id!(start_stop_button))
782 .finger_down(actions)
783 .is_some()
784 {
785 if self.conversation_active {
786 self.reset_all(cx);
787 } else {
788 self.start_conversation(cx);
789 }
790 self.update_ui(cx);
791 }
792
793 if self
795 .view(id!(tool_permission_line))
796 .button(id!(message_section.content_section.tool_actions.approve))
797 .clicked(actions)
798 {
799 self.approve_tool_call(cx);
800 }
801
802 if self
803 .view(id!(tool_permission_line))
804 .button(id!(message_section.content_section.tool_actions.deny))
805 .clicked(actions)
806 {
807 self.deny_tool_call(cx);
808 }
809
810 let speaker_dropdown = self.drop_down(id!(speaker_selector.device_selector));
811 if let Some(_id) = speaker_dropdown.changed(actions) {
812 let selected_device = self
813 .audio_devices
814 .iter()
815 .find(|device| device.name == speaker_dropdown.selected_label());
816 if let Some(device) = selected_device {
817 cx.use_audio_outputs(&[device.device_id]);
818 }
819 }
820
821 let microphone_dropdown = self.drop_down(id!(mic_selector.device_selector));
822 if let Some(_id) = microphone_dropdown.changed(actions) {
823 let selected_device = self
824 .audio_devices
825 .iter()
826 .find(|device| device.name == microphone_dropdown.selected_label());
827 if let Some(device) = selected_device {
828 cx.use_audio_inputs(&[device.device_id]);
829 }
830 }
831
832 let mute_button = self.button(id!(mute_button));
834 let mute_label = self.label(id!(mute_status));
835 if self.view(id!(mute_control)).finger_down(actions).is_some()
836 || mute_button.clicked(actions)
837 {
838 let mut is_muted = self.is_muted.lock().unwrap();
839 if *is_muted {
840 *is_muted = false;
842 mute_button.set_text(cx, ""); mute_label.set_text(cx, "Mute");
844 } else {
845 *is_muted = true;
846 mute_button.set_text(cx, ""); mute_label.set_text(cx, "Unmute");
848 }
849 }
850
851 if self
853 .view(id!(request_permission_button))
854 .finger_up(actions)
855 .is_some()
856 {
857 cx.request_permission(Permission::AudioInput);
858 }
859
860 if self.button(id!(close_button)).clicked(actions) {
862 self.reset_state(cx);
863 cx.action(RealtimeModalAction::DismissModal);
864 }
865 }
866}
867
868impl Realtime {
869 pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
870 self.realtime_channel = Some(channel);
871 self.is_connected = true;
872 }
873
874 pub fn set_bot_entity_id(&mut self, cx: &mut Cx, bot_entity_id: EntityId) {
875 self.bot_entity_id = Some(bot_entity_id);
876
877 if let Some(EntityId::Bot(bot_id)) = &self.bot_entity_id {
881 if !bot_id.provider().contains("api.openai.com") {
882 let labels = vec![
883 "whisper".to_string(),
884 "gpt-4o-transcribe".to_string(),
885 "gpt-4o-mini-transcribe".to_string(),
886 ];
887 self.drop_down(id!(transcription_model_selector))
888 .set_labels(cx, labels);
889 }
890 }
891 }
892
893 pub fn set_bot_context(&mut self, bot_context: Option<crate::protocol::BotContext>) {
894 self.bot_context = bot_context;
895 }
896
897 fn try_start_pending_conversation(&mut self, cx: &mut Cx) {
898 if self.is_connected && !self.conversation_active && self.should_request_connection {
899 self.should_request_connection = false;
901 self.connection_request_sent = false;
902 self.conversation_active = true;
903 self.ai_is_responding = true;
904 self.user_is_interrupting = false;
905 self.current_assistant_item_id = None;
906 *self.should_record.lock().unwrap() = false;
907 self.has_sent_audio = false;
908
909 self.recorded_audio.lock().unwrap().clear();
911 self.playback_audio.lock().unwrap().clear();
912 *self.is_playing.lock().unwrap() = false;
913 *self.playback_position.lock().unwrap() = 0;
914 self.transcript.clear();
915
916 self.update_ui(cx);
917 self.start_audio_streaming(cx);
918 self.create_greeting_response(cx);
919 }
920 }
921
922 fn start_conversation(&mut self, cx: &mut Cx) {
923 if !self.is_connected {
924 self.should_request_connection = true;
926 self.connection_request_sent = false;
927 self.label(id!(status_label))
928 .set_text(cx, "Reconnecting...");
929 return;
930 }
931
932 self.conversation_active = true;
933 self.ai_is_responding = true;
934 self.user_is_interrupting = false;
935 self.current_assistant_item_id = None;
936 *self.should_record.lock().unwrap() = false;
937 self.has_sent_audio = false;
938
939 self.recorded_audio.lock().unwrap().clear();
941 self.playback_audio.lock().unwrap().clear();
942 *self.is_playing.lock().unwrap() = false;
943 *self.playback_position.lock().unwrap() = 0;
944 self.transcript.clear();
945
946 self.update_ui(cx);
947 self.label(id!(status_label)).set_text(cx, "Loading..."); self.start_audio_streaming(cx);
949 self.create_greeting_response(cx);
950 }
951
952 fn start_audio_streaming(&mut self, cx: &mut Cx) {
953 if self.audio_streaming_timer.is_none() {
955 let timer = cx.start_interval(0.020); self.audio_streaming_timer = Some(timer);
957 }
958 }
959
960 fn send_audio_chunk_to_realtime(&mut self, _cx: &mut Cx) {
961 if let Ok(mut recorded) = self.recorded_audio.try_lock() {
963 if !recorded.is_empty() {
964 let audio_data = recorded.clone();
965 recorded.clear();
966
967 let pcm16_data = Self::convert_f32_to_pcm16(&audio_data);
969 if let Some(channel) = &self.realtime_channel {
970 let _ = channel
971 .command_sender
972 .unbounded_send(RealtimeCommand::SendAudio(pcm16_data));
973 }
974 }
975 }
976 }
977
978 fn reset_conversation_state(
980 &mut self,
981 cx: &mut Cx,
982 status_message: &str,
983 allow_reconnect: bool,
984 ) {
985 self.stop_conversation(cx);
986
987 self.is_connected = false;
988 self.has_sent_audio = false;
989
990 if !allow_reconnect {
991 self.should_request_connection = false;
993 self.connection_request_sent = false;
994 }
995 self.transcript.clear();
996 self.label(id!(status_label)).set_text(cx, status_message);
997
998 self.view(id!(tool_permission_line)).set_visible(cx, false);
1000 self.pending_tool_call = None;
1001
1002 self.view(id!(voice_selector_wrapper)).set_visible(cx, true);
1004 self.view(id!(selected_voice_view)).set_visible(cx, false);
1005
1006 self.update_ui(cx);
1007 }
1008
1009 fn reset_all(&mut self, cx: &mut Cx) {
1010 self.reset_conversation_state(cx, "Ready to start", false);
1011
1012 if let Some(channel) = &self.realtime_channel {
1014 let _ = channel
1015 .command_sender
1016 .unbounded_send(RealtimeCommand::StopSession);
1017 }
1018 }
1019
1020 fn stop_conversation(&mut self, cx: &mut Cx) {
1021 self.conversation_active = false;
1022 self.ai_is_responding = false;
1023 self.user_is_interrupting = false;
1024 self.current_assistant_item_id = None;
1025 *self.should_record.lock().unwrap() = false;
1026 *self.is_playing.lock().unwrap() = false;
1027
1028 if let Some(timer) = &self.audio_streaming_timer {
1030 cx.stop_timer(*timer);
1031 self.audio_streaming_timer = None;
1032 }
1033
1034 if let Ok(mut playback) = self.playback_audio.try_lock() {
1036 playback.clear();
1037 }
1038 if let Ok(mut recorded) = self.recorded_audio.try_lock() {
1039 recorded.clear();
1040 }
1041 }
1042
1043 fn handle_realtime_events(&mut self, cx: &mut Cx) {
1044 let events = if let Some(channel) = &self.realtime_channel {
1045 if let Ok(mut receiver_opt) = channel.event_receiver.lock() {
1046 if let Some(receiver) = receiver_opt.as_mut() {
1047 let mut events = Vec::new();
1048 while let Ok(Some(event)) = receiver.try_next() {
1049 events.push(event);
1050 }
1051 events
1052 } else {
1053 Vec::new()
1054 }
1055 } else {
1056 Vec::new()
1057 }
1058 } else {
1059 Vec::new()
1060 };
1061
1062 for event in events {
1064 match event {
1065 RealtimeEvent::SessionReady => {
1066 self.label(id!(connection_status))
1067 .set_text(cx, "✅ Connected to OpenAI");
1068 }
1070 RealtimeEvent::AudioData(audio_data) => {
1071 if self.user_is_interrupting {
1073 self.user_is_interrupting = false;
1074 }
1075
1076 self.ai_is_responding = true;
1077
1078 self.add_audio_to_playback(audio_data);
1080
1081 if self.conversation_active {
1083 let interruptions_enabled =
1084 self.check_box(id!(toggle_interruptions)).active(cx);
1085
1086 if !interruptions_enabled {
1087 *self.should_record.lock().unwrap() = false;
1089 } else {
1090 *self.should_record.lock().unwrap() = true;
1092 }
1093 }
1094
1095 self.label(id!(status_label))
1096 .set_text(cx, "🔊 Playing audio...");
1097 }
1098 RealtimeEvent::AudioTranscript(text) => {
1099 self.transcript.push_str(&text);
1100 }
1101 RealtimeEvent::AudioTranscriptCompleted(transcript, item_id) => {
1102 if !transcript.trim().is_empty() {
1104 let message = Message {
1105 from: self.bot_entity_id.clone().unwrap_or_default(),
1106 content: MessageContent {
1107 text: transcript,
1108 ..Default::default()
1109 },
1110 ..Default::default()
1111 };
1112 self.conversation_messages.push((item_id, message));
1113 }
1114 }
1115 RealtimeEvent::UserTranscriptCompleted(transcript, item_id) => {
1116 if !transcript.trim().is_empty() {
1118 let message = Message {
1119 from: EntityId::User,
1120 content: MessageContent {
1121 text: transcript,
1122 ..Default::default()
1123 },
1124 ..Default::default()
1125 };
1126 self.conversation_messages.push((item_id, message));
1127 }
1128 }
1129 RealtimeEvent::SpeechStarted => {
1130 self.label(id!(status_label))
1131 .set_text(cx, "🎤 User speech detected");
1132
1133 self.user_is_interrupting = true;
1134
1135 if let Ok(mut playbook) = self.playback_audio.try_lock() {
1138 let cleared_samples = playbook.len();
1139 playbook.clear();
1140 ::log::debug!(
1141 "Cleared {} audio samples from playback buffer to prevent feedback",
1142 cleared_samples
1143 );
1144 }
1145
1146 if let Ok(mut is_playing) = self.is_playing.try_lock() {
1148 *is_playing = false;
1149 }
1150 if let Ok(mut position) = self.playback_position.try_lock() {
1151 *position = 0;
1152 }
1153
1154 if self.conversation_active {
1156 *self.should_record.lock().unwrap() = true;
1157 }
1158 }
1159 RealtimeEvent::SpeechStopped => {
1160 self.label(id!(status_label)).set_text(cx, "Processing...");
1161
1162 if self.conversation_active {
1164 *self.should_record.lock().unwrap() = false;
1165 }
1166 }
1167 RealtimeEvent::ResponseCompleted => {
1168 let status_label = self.label(id!(status_label));
1169 self.user_is_interrupting = false;
1170 self.ai_is_responding = false;
1171 self.current_assistant_item_id = None;
1172
1173 if self.conversation_active {
1175 let interruptions_enabled =
1177 self.check_box(id!(toggle_interruptions)).active(cx);
1178
1179 if interruptions_enabled {
1180 *self.should_record.lock().unwrap() = true;
1182 status_label.set_text(cx, "✅ Response generated - 🎤 listening again");
1183 } else {
1184 if self.playback_audio.lock().unwrap().is_empty() {
1186 ::log::debug!(
1187 "Setting should_record to true - response completed and playback empty"
1188 );
1189 *self.should_record.lock().unwrap() = true;
1190 status_label
1191 .set_text(cx, "✅ Response generated - 🎤 listening again");
1192 } else {
1193 status_label
1194 .set_text(cx, "✅ Response generated - 🔊 playing audio");
1195 ::log::debug!("Playback still active, keeping recording disabled");
1196 }
1197 }
1198 }
1199 }
1200 RealtimeEvent::FunctionCallRequest {
1201 name,
1202 call_id,
1203 arguments,
1204 } => {
1205 let dangerous_mode_enabled = self
1207 .bot_context
1208 .as_ref()
1209 .map(|ctx| {
1210 ctx.tool_manager()
1211 .map(|tm| tm.get_dangerous_mode_enabled())
1212 .unwrap_or(false)
1213 })
1214 .unwrap_or(false);
1215
1216 if dangerous_mode_enabled {
1217 use crate::mcp::mcp_manager::display_name_from_namespaced;
1219 let display_name = display_name_from_namespaced(&name);
1220 self.label(id!(status_label))
1221 .set_text(cx, &format!("🔧 Auto-executing tool: {}", display_name));
1222
1223 self.handle_function_call(cx, name, call_id, arguments);
1225 } else {
1226 self.label(id!(status_label))
1228 .set_text(cx, &format!("🔧 Tool permission requested: {}", name));
1229
1230 self.show_tool_permission_request(cx, name, call_id, arguments);
1231 }
1232 }
1233 RealtimeEvent::Error(error) => {
1234 ::log::error!("Realtime API error: {}", error);
1235
1236 if !self.is_connected || !self.conversation_active {
1237 ::log::debug!(
1238 "Ignoring error - already disconnected or conversation not active"
1239 );
1240 return;
1241 }
1242
1243 if error.contains("Connection lost")
1245 || error.contains("Connection closed")
1246 || error.contains("Failed to send")
1247 {
1248 self.reset_conversation_state(
1250 cx,
1251 "❌ Connection lost. Please restart the conversation.",
1252 true, );
1254 } else {
1255 self.label(id!(status_label))
1257 .set_text(cx, &format!("❌ Error: {}", error));
1258
1259 if self.conversation_active {
1261 *self.should_record.lock().unwrap() = true;
1262 }
1263 }
1264 }
1265 }
1266 }
1267 }
1268
1269 fn show_tool_permission_request(
1270 &mut self,
1271 cx: &mut Cx,
1272 name: String,
1273 call_id: String,
1274 arguments: String,
1275 ) {
1276 use crate::mcp::mcp_manager::display_name_from_namespaced;
1277
1278 self.pending_tool_call = Some((name.clone(), call_id, arguments));
1279
1280 let tool_line = self.view(id!(tool_permission_line));
1281 tool_line.set_visible(cx, true);
1282
1283 let display_name = display_name_from_namespaced(&name);
1285
1286 tool_line
1287 .avatar(id!(message_section.sender.avatar))
1288 .borrow_mut()
1289 .unwrap()
1290 .avatar = Some(crate::protocol::Picture::Grapheme("T".into()));
1291 tool_line
1292 .label(id!(message_section.sender.name))
1293 .set_text(cx, "Permission Request");
1294
1295 let content = crate::protocol::MessageContent {
1296 text: format!("Tool '{}' is requesting permission to run", display_name),
1297 ..Default::default()
1298 };
1299 tool_line
1300 .slot(id!(message_section.content_section.content))
1301 .current()
1302 .as_standard_message_content()
1303 .set_content(cx, &content);
1304
1305 tool_line
1306 .view(id!(message_section.content_section.tool_actions))
1307 .set_visible(cx, true);
1308
1309 *self.should_record.lock().unwrap() = false;
1311
1312 self.view.redraw(cx);
1313 }
1314
1315 fn handle_function_call(
1316 &mut self,
1317 _cx: &mut Cx,
1318 name: String,
1319 call_id: String,
1320 arguments: String,
1321 ) {
1322 let Some(context) = self.bot_context.as_ref().cloned() else {
1323 ::log::error!("No bot context available for function call");
1324 if let Some(channel) = &self.realtime_channel {
1325 let error_result = serde_json::json!({
1326 "error": "Tool manager not available"
1327 })
1328 .to_string();
1329 let _ = channel.command_sender.unbounded_send(
1330 crate::protocol::RealtimeCommand::SendFunctionCallResult {
1331 call_id,
1332 output: error_result,
1333 },
1334 );
1335 }
1336 return;
1337 };
1338
1339 let Some(tool_manager) = context.tool_manager() else {
1340 ::log::error!("No tool manager available for function call");
1341 if let Some(channel) = &self.realtime_channel {
1342 let error_result = serde_json::json!({
1343 "error": "Tool manager not available"
1344 })
1345 .to_string();
1346 let _ = channel.command_sender.unbounded_send(
1347 crate::protocol::RealtimeCommand::SendFunctionCallResult {
1348 call_id,
1349 output: error_result,
1350 },
1351 );
1352 }
1353 return;
1354 };
1355
1356 let channel = self.realtime_channel.clone();
1357
1358 let future = async move {
1359 let arguments_map = match crate::mcp::mcp_manager::parse_tool_arguments(&arguments) {
1361 Ok(args) => args,
1362 Err(e) => {
1363 ::log::error!("Failed to parse function call arguments: {}", e);
1364 if let Some(channel) = &channel {
1365 let error_result = serde_json::json!({
1366 "error": e
1367 })
1368 .to_string();
1369 let _ = channel.command_sender.unbounded_send(
1370 crate::protocol::RealtimeCommand::SendFunctionCallResult {
1371 call_id,
1372 output: error_result,
1373 },
1374 );
1375 }
1376 return;
1377 }
1378 };
1379
1380 let result = tool_manager
1381 .execute_tool_call(&name, &call_id, arguments_map)
1382 .await;
1383
1384 if let Some(channel) = &channel {
1385 let output = if result.is_error {
1386 serde_json::json!({
1387 "error": result.content
1388 })
1389 .to_string()
1390 } else {
1391 result.content
1392 };
1393
1394 let _ = channel.command_sender.unbounded_send(
1395 crate::protocol::RealtimeCommand::SendFunctionCallResult { call_id, output },
1396 );
1397 }
1398 };
1399
1400 crate::utils::asynchronous::spawn(future);
1401 }
1402
1403 fn approve_tool_call(&mut self, cx: &mut Cx) {
1404 if let Some((name, call_id, arguments)) = self.pending_tool_call.take() {
1405 self.view(id!(tool_permission_line)).set_visible(cx, false);
1407
1408 use crate::mcp::mcp_manager::display_name_from_namespaced;
1410 let display_name = display_name_from_namespaced(&name);
1411 self.label(id!(status_label))
1412 .set_text(cx, &format!("🔧 Executing tool: {}", display_name));
1413
1414 self.handle_function_call(cx, name, call_id, arguments);
1416
1417 if self.conversation_active {
1419 *self.should_record.lock().unwrap() = true;
1420 }
1421
1422 self.view.redraw(cx);
1423 }
1424 }
1425
1426 fn deny_tool_call(&mut self, cx: &mut Cx) {
1427 if let Some((name, call_id, _arguments)) = self.pending_tool_call.take() {
1428 self.view(id!(tool_permission_line)).set_visible(cx, false);
1430
1431 if let Some(channel) = &self.realtime_channel {
1433 let denial_result = serde_json::json!({
1434 "error": "Tool execution denied by user"
1435 })
1436 .to_string();
1437 let _ = channel.command_sender.unbounded_send(
1438 crate::protocol::RealtimeCommand::SendFunctionCallResult {
1439 call_id,
1440 output: denial_result,
1441 },
1442 );
1443 }
1444
1445 use crate::mcp::mcp_manager::display_name_from_namespaced;
1447 let display_name = display_name_from_namespaced(&name);
1448 self.label(id!(status_label))
1449 .set_text(cx, &format!("🚫 Tool '{}' denied", display_name));
1450
1451 if self.conversation_active {
1453 *self.should_record.lock().unwrap() = true;
1454 }
1455
1456 self.view.redraw(cx);
1457 }
1458 }
1459
1460 fn setup_audio(&mut self, cx: &mut Cx) {
1461 let recorded_audio = self.recorded_audio.clone();
1462 let should_record = self.should_record.clone();
1463 let is_muted = self.is_muted.clone();
1464
1465 cx.audio_input(0, move |info, input_buffer| {
1467 if let Ok(should_record_guard) = should_record.try_lock() {
1468 if let Ok(is_muted_guard) = is_muted.try_lock() {
1469 if *should_record_guard && !*is_muted_guard {
1470 if let Ok(mut recorded) = recorded_audio.try_lock() {
1471 let channel = input_buffer.channel(0);
1472
1473 let input_sample_rate = info.sample_rate;
1475 let target_sample_rate = 24000.0;
1476 let downsample_ratio =
1477 (input_sample_rate / target_sample_rate) as usize;
1478
1479 for i in (0..channel.len()).step_by(downsample_ratio) {
1482 recorded.push(channel[i]);
1483 }
1484 }
1485 }
1486 }
1487 }
1488 });
1489
1490 let playback_audio = self.playback_audio.clone();
1491 let playback_position = self.playback_position.clone();
1492 let is_playing = self.is_playing.clone();
1493
1494 cx.audio_output(0, move |info, output_buffer| {
1496 output_buffer.zero();
1498
1499 if let Ok(mut playback) = playback_audio.try_lock() {
1500 if let Ok(mut pos) = playback_position.try_lock() {
1501 if let Ok(mut playing) = is_playing.try_lock() {
1502 let input_sample_rate = 24000.0; let output_sample_rate = info.sample_rate;
1505 let upsample_ratio = (output_sample_rate / input_sample_rate) as usize;
1506
1507 if *playing
1508 && !playback.is_empty()
1509 && *pos < playback.len() * upsample_ratio
1510 {
1511 let frame_count = output_buffer.frame_count();
1513 let channel_count = output_buffer.channel_count();
1514
1515 let mut samples_to_drain = 0;
1516
1517 for frame_idx in 0..frame_count {
1518 let input_sample_rate = 24000.0; let output_sample_rate = info.sample_rate;
1522 let upsample_ratio =
1523 (output_sample_rate / input_sample_rate) as usize;
1524
1525 let sample_idx = *pos / upsample_ratio; if sample_idx < playback.len() {
1528 let audio_sample = playback[sample_idx];
1529
1530 for channel_idx in 0..channel_count {
1532 let channel = output_buffer.channel_mut(channel_idx);
1533 channel[frame_idx] = audio_sample;
1534 }
1535
1536 *pos += 1;
1537
1538 if *pos % upsample_ratio == 0 {
1540 samples_to_drain += 1;
1541 }
1542 } else {
1543 *playing = false;
1545 *pos = 0;
1546 samples_to_drain = playback.len();
1548 break;
1549 }
1550 }
1551
1552 if samples_to_drain > 0 && samples_to_drain <= playback.len() {
1554 playback.drain(..samples_to_drain);
1555 *pos = (*pos).saturating_sub(samples_to_drain * upsample_ratio);
1557 }
1560 } else {
1561 if *playing && playback.is_empty() {
1563 *playing = false;
1564 *pos = 0;
1565 }
1566 }
1567 }
1568 }
1569 }
1570 });
1571
1572 self.audio_setup_done = true;
1573 }
1574
1575 fn add_audio_to_playback(&mut self, audio_bytes: Vec<u8>) {
1576 let samples = Self::convert_pcm16_to_f32(&audio_bytes);
1578
1579 if let Ok(mut playback) = self.playback_audio.try_lock() {
1580 if let Ok(mut is_playing) = self.is_playing.try_lock() {
1582 if !*is_playing {
1583 playback.clear();
1585 *self.playback_position.lock().unwrap() = 0;
1586 *is_playing = true;
1587 ::log::debug!(
1588 "Started fresh playback of AI response audio ({} samples)",
1589 samples.len()
1590 );
1591 }
1592 }
1593
1594 playback.extend_from_slice(&samples);
1595 }
1596 }
1597
1598 fn convert_f32_to_pcm16(samples: &[f32]) -> Vec<u8> {
1599 let mut pcm16_bytes = Vec::with_capacity(samples.len() * 2);
1600
1601 for &sample in samples {
1602 let clamped = sample.max(-1.0).min(1.0);
1603 let pcm16_sample = (clamped * 32767.0) as i16;
1604 pcm16_bytes.extend_from_slice(&pcm16_sample.to_le_bytes());
1605 }
1606
1607 pcm16_bytes
1608 }
1609
1610 fn convert_pcm16_to_f32(bytes: &[u8]) -> Vec<f32> {
1611 let mut samples = Vec::with_capacity(bytes.len() / 2);
1612
1613 for chunk in bytes.chunks_exact(2) {
1614 let pcm16_sample = i16::from_le_bytes([chunk[0], chunk[1]]);
1615 let f32_sample = pcm16_sample as f32 / 32767.0;
1616 samples.push(f32_sample);
1617 }
1618
1619 samples
1620 }
1621
1622 fn update_session_config(&mut self, cx: &mut Cx) {
1623 self.selected_voice = self.drop_down(id!(voice_selector)).selected_label();
1624 self.view(id!(voice_selector_wrapper))
1625 .set_visible(cx, false);
1626 self.view(id!(selected_voice_view)).set_visible(cx, true);
1627 self.label(id!(selected_voice)).set_text(
1628 cx,
1629 format!("Selected voice: {}", self.selected_voice).as_str(),
1630 );
1631
1632 if let Some(channel) = &self.realtime_channel {
1634 let _ = channel
1635 .command_sender
1636 .unbounded_send(RealtimeCommand::UpdateSessionConfig {
1637 voice: self.selected_voice.clone(),
1638 transcription_model: self
1639 .drop_down(id!(transcription_model_selector))
1640 .selected_label(),
1641 });
1642 }
1643 }
1644
1645 fn create_greeting_response(&mut self, cx: &mut Cx) {
1646 self.update_session_config(cx);
1647 if let Some(channel) = &self.realtime_channel {
1648 let _ = channel
1649 .command_sender
1650 .unbounded_send(RealtimeCommand::CreateGreetingResponse);
1651 }
1652 }
1653
1654 fn update_ui(&self, cx: &mut Cx) {
1655 if !self.conversation_active {
1656 self.label(id!(stop_start_label))
1657 .set_text(cx, "Start conversation");
1658 } else {
1659 self.label(id!(stop_start_label))
1660 .set_text(cx, "Stop conversation");
1661 }
1662 }
1663
1664 pub fn connection_requested(&mut self) -> bool {
1666 if self.should_request_connection && !self.is_connected && !self.connection_request_sent {
1667 self.connection_request_sent = true;
1668 true
1669 } else {
1670 false
1671 }
1672 }
1673
1674 pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1676 let mut messages_with_ids = std::mem::take(&mut self.conversation_messages);
1677
1678 messages_with_ids.sort_by(|a, b| a.0.cmp(&b.0));
1680
1681 messages_with_ids
1683 .into_iter()
1684 .map(|(_, message)| message)
1685 .collect()
1686 }
1687
1688 pub fn reset_state(&mut self, cx: &mut Cx) {
1690 self.reset_all(cx);
1691 }
1692}
1693
1694impl RealtimeRef {
1695 pub fn set_realtime_channel(&mut self, channel: RealtimeChannel) {
1696 if let Some(mut inner) = self.borrow_mut() {
1697 inner.set_realtime_channel(channel);
1698 }
1699 }
1700
1701 pub fn set_bot_entity_id(&mut self, cx: &mut Cx, bot_entity_id: EntityId) {
1702 if let Some(mut inner) = self.borrow_mut() {
1703 inner.set_bot_entity_id(cx, bot_entity_id);
1704 }
1705 }
1706
1707 pub fn connection_requested(&mut self) -> bool {
1708 if let Some(mut inner) = self.borrow_mut() {
1709 inner.connection_requested()
1710 } else {
1711 false
1712 }
1713 }
1714
1715 pub fn take_conversation_messages(&mut self) -> Vec<Message> {
1716 if let Some(mut inner) = self.borrow_mut() {
1717 inner.take_conversation_messages()
1718 } else {
1719 Vec::new()
1720 }
1721 }
1722
1723 pub fn reset_state(&mut self, cx: &mut Cx) {
1724 if let Some(mut inner) = self.borrow_mut() {
1725 inner.reset_state(cx);
1726 }
1727 }
1728
1729 pub fn set_bot_context(&mut self, bot_context: Option<crate::protocol::BotContext>) {
1730 if let Some(mut inner) = self.borrow_mut() {
1731 inner.set_bot_context(bot_context);
1732 }
1733 }
1734}