import os import streamlit as st from openai import OpenAI import tempfile import base64 # Set up OpenAI client client = OpenAI(api_key="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") # Replace this with your actual key or use os.getenv() st.set_page_config(page_title="🎤 Real-time AI Voice Chat", page_icon="🎤") st.title("🎤 Real-time AI Voice Conversation") st.markdown("Have a natural voice conversation with AI using Whisper for speech recognition.") # Custom HTML/JS for audio recording (fallback option) def get_audio_recorder_html(): return """

Ready to record

""" # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] if "theme" not in st.session_state: st.session_state.theme = "" if "conversation_active" not in st.session_state: st.session_state.conversation_active = False if "auto_play_response" not in st.session_state: st.session_state.auto_play_response = True def transcribe_with_whisper(audio_file_path): """Transcribe audio using OpenAI Whisper""" try: with open(audio_file_path, "rb") as audio_file: transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) return transcript.strip() except Exception as e: st.error(f"Transcription error: {e}") return None def generate_speech(text, voice="alloy"): """Generate speech from text using OpenAI TTS""" try: response = client.audio.speech.create( model="tts-1", voice=voice, input=text ) return response.content except Exception as e: st.error(f"TTS error: {e}") return None def process_conversation(user_text): """Process user input and get AI response""" if not user_text: return None # Add user message st.session_state.messages.append({"role": "user", "content": user_text}) # Get AI response with st.spinner("🤖 AI is thinking..."): try: response = client.chat.completions.create( model="gpt-4", messages=st.session_state.messages, max_tokens=150 ) ai_reply = response.choices[0].message.content st.session_state.messages.append({"role": "assistant", "content": ai_reply}) return ai_reply except Exception as e: st.error(f"AI response error: {e}") return None # Theme setup if not st.session_state.theme: st.markdown("### 🎭 Setup Your Conversation") theme_input = st.text_input( "Enter conversation theme/role", placeholder="e.g., Job interview, Customer service, Language practice, Casual chat" ) col1, col2 = st.columns(2) with col1: voice_selection = st.selectbox( "Choose AI voice:", ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] ) with col2: st.session_state.auto_play_response = st.checkbox( "Auto-play AI responses", value=True, help="Automatically play AI voice responses" ) if st.button("🚀 Start Conversation", type="primary") and theme_input: st.session_state.theme = theme_input st.session_state.ai_voice = voice_selection # Create system prompt system_prompt = f""" You are having a natural voice conversation. The theme/context is: "{theme_input}" Guidelines: - Keep responses conversational and natural (like speaking, not writing) - Responses should be 1-3 sentences maximum for good conversation flow - Stay in character/context based on the theme - Be engaging and ask follow-up questions when appropriate - Respond as if this is a real-time spoken conversation """ st.session_state.messages.append({"role": "system", "content": system_prompt}) st.session_state.conversation_active = True st.rerun() # Main conversation interface if st.session_state.theme and st.session_state.conversation_active: st.markdown(f"### 🎯 **Theme:** {st.session_state.theme}") # Method 1: Try audio-recorder-streamlit first st.markdown("### 🎤 Voice Input Method 1") try: from audio_recorder_streamlit import audio_recorder audio_bytes = audio_recorder( text="Click to speak", recording_color="#e74c3c", neutral_color="#3498db", icon_name="microphone", icon_size="2x", key="audio_recorder_main" ) if audio_bytes: # Save audio to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: tmp_file.write(audio_bytes) tmp_file_path = tmp_file.name # Transcribe with Whisper with st.spinner("🎧 Transcribing your speech..."): user_text = transcribe_with_whisper(tmp_file_path) if user_text: st.success(f"**You said:** {user_text}") # Get AI response ai_response = process_conversation(user_text) if ai_response: st.info(f"**AI replied:** {ai_response}") # Generate and play AI speech if st.session_state.auto_play_response: with st.spinner("🔊 Generating AI voice..."): speech_audio = generate_speech(ai_response, st.session_state.ai_voice) if speech_audio: st.audio(speech_audio, format='audio/mp3', autoplay=True) # Clean up try: os.unlink(tmp_file_path) except: pass except ImportError: st.warning("audio-recorder-streamlit not available. Using alternative method below.") # Method 2: File upload fallback st.markdown("### 🎤 Voice Input Method 2 (File Upload)") uploaded_audio = st.file_uploader( "Upload audio file", type=['wav', 'mp3', 'm4a', 'ogg'], help="Record audio on your device and upload it here" ) if uploaded_audio: # Save uploaded file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: tmp_file.write(uploaded_audio.read()) tmp_file_path = tmp_file.name if st.button("🎧 Transcribe Uploaded Audio"): with st.spinner("🎧 Transcribing your speech..."): user_text = transcribe_with_whisper(tmp_file_path) if user_text: st.success(f"**You said:** {user_text}") # Get AI response ai_response = process_conversation(user_text) if ai_response: st.info(f"**AI replied:** {ai_response}") # Generate and play AI speech if st.session_state.auto_play_response: with st.spinner("🔊 Generating AI voice..."): speech_audio = generate_speech(ai_response, st.session_state.ai_voice) if speech_audio: st.audio(speech_audio, format='audio/mp3', autoplay=True) # Clean up try: os.unlink(tmp_file_path) except: pass # Text input backup st.markdown("### 💬 Text Input (Backup)") text_input = st.chat_input("Type your message here") if text_input: ai_response = process_conversation(text_input) if ai_response and st.session_state.auto_play_response: speech_audio = generate_speech(ai_response, st.session_state.ai_voice) if speech_audio: st.audio(speech_audio, format='audio/mp3', autoplay=True) # Conversation history st.markdown("### 💭 Conversation History") # Display recent messages (last 10 for better performance) recent_messages = st.session_state.messages[-10:] if len(st.session_state.messages) > 10 else st.session_state.messages for msg in recent_messages: if msg["role"] == "user": st.chat_message("user", avatar="🗣️").markdown(msg["content"]) elif msg["role"] == "assistant": st.chat_message("assistant", avatar="🤖").markdown(msg["content"]) # Control buttons st.markdown("### 🎛️ Controls") col1, col2, col3 = st.columns(3) with col1: if st.button("🔄 New Conversation"): st.session_state.messages = [] st.session_state.theme = "" st.session_state.conversation_active = False st.rerun() with col2: if st.button("🧹 Clear History"): # Keep system message, clear conversation system_msg = next((msg for msg in st.session_state.messages if msg["role"] == "system"), None) st.session_state.messages = [system_msg] if system_msg else [] st.rerun() with col3: st.session_state.auto_play_response = st.toggle( "🔊 Auto-play", value=st.session_state.auto_play_response ) # Instructions st.markdown(""" --- ### 📋 How to Use: **Option 1: Direct Recording** (if available) 1. Click the microphone button and speak 2. AI will transcribe and respond with voice **Option 2: File Upload** 1. Record audio on your phone/computer 2. Upload the audio file 3. Click "Transcribe" to process **Option 3: Text Backup** - Type your message if voice isn't working ### 💡 Tips: - **Speak clearly** for better transcription - **Keep messages short** for natural conversation flow - **Use good audio quality** when uploading files ### 📱 Mobile Recording Apps: - **iPhone**: Voice Memos app - **Android**: Voice Recorder app - **Any device**: Use your phone's built-in recorder ### 🔧 Installation (Simplified): ```bash pip install streamlit openai audio-recorder-streamlit ``` *No PyAudio required!* 🎉 """) # Debug info if st.checkbox("🔍 Show Debug Info"): st.write("**Session State:**") st.write(f"- Theme: {st.session_state.theme}") st.write(f"- Messages count: {len(st.session_state.messages)}") st.write(f"- Conversation active: {st.session_state.conversation_active}") st.write(f"- Auto-play: {st.session_state.auto_play_response}")