Du har fått ett telefonnummer. Välj ett kodexempel, kör det på din server, och ring numret. Klart!
Med ljud som strömmar live går det att bygga allt som lyssnar eller pratar i telefon.
Koppla in en AI och låt den svara på samtal, ta meddelanden och boka möten.
Spela upp en låt, ett meddelande eller en trumpetsolo när någon ringer.
Lyssna, översätt och prata tillbaka på ett annat språk, live.
Spela in ljudet, skicka till en AI, få mötesanteckningar på mejl.
Frågesport, gissa talet, välj-ditt-eget-äventyr — allt via röst.
Bygg en väntkö med kömusik och meddelande om köplats.
Du behöver någonstans att köra Python-kod som är nåbar från internet. Välj det som passar dig.
Skaffa en server på aislojd.se. SSH:a sedan in och installera Python:
sudo apt update && sudo apt install -y python3 python3-pip python3-venv
python3 -m venv ~/ws-env
source ~/ws-env/bin/activate
pip install websockets python-dotenv openai
Öppna port 8080 i brandväggen:
sudo ufw allow 8080/tcp
Konfigurera sen ditt nummers WebSocket-URL till: ws://DIN_SERVER_IP:8080
Tips: du kan använda Claude Code direkt i terminalen på din VPS för att få hjälp att skriva och debugga din kod. Kör claude i terminalen för att starta.
Kör koden lokalt och exponera den med ngrok:
ngrok http 8080
Du får en URL som https://abc123.ngrok.io — byt https:// mot wss:// och använd den som din WebSocket-URL.
Använd Lovable för att generera och hosta en WebSocket-server. Klistra in koden och be den deploya.
Kopiera koden, spara som en .py-fil, och kör. Ring sen ditt nummer.
Allt du säger skickas tillbaka till dig. Det enklaste sättet att kolla att allt funkar.
#!/usr/bin/env python3
"""Sample WebSocket server that echoes audio back to the caller."""
import asyncio
import json
import logging
import sys
import websockets
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
async def handle(ws):
# Wait for hello
hello = json.loads(await ws.recv())
# Start play and listen
await ws.send(json.dumps({"t": "sending", "format": "pcm_24000"}))
await ws.send(json.dumps({"t": "listening", "format": "pcm_24000"}))
# Echo all audio
async for message in ws:
data = json.loads(message)
if data.get("t") == "audio":
await ws.send(message)
elif data.get("t") == "bye":
log.info("Call ended")
break
else:
log.info("Non-audio message: %s", message)
async def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
async with websockets.serve(handle, "0.0.0.0", port):
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python echo-client.pySpelar en WAV-fil för den som ringer. Väntar tills allt ljud spelats klart innan den lägger på.
#!/usr/bin/env python3
"""Audio file player demo.
Streams a WAV audio file to the caller. The proxy automatically waits for all
buffered audio to finish playing before hanging up.
Usage:
uv run python audio-file-player.py <audio_file.wav> [port]
Requirements:
- Audio file must be WAV format (PCM16, mono)
- Sample rate must match session sample rate (24kHz for default trigger-call.sh)
"""
import asyncio
import base64
import json
import logging
import sys
import wave
import websockets
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s.%(msecs)03d %(message)s",
datefmt="%H:%M:%S"
)
log = logging.getLogger(__name__)
async def handle_client(ws, audio_data: bytes, file_sample_rate: int):
"""Handle WebSocket connection and stream audio."""
# Wait for hello
hello = json.loads(await ws.recv())
callid = hello.get("callid")
from_number = hello.get("from")
to_number = hello.get("to")
log.info(f"Call: {callid} {from_number} {to_number}")
# Send play
codec_str = f"pcm_{file_sample_rate}"
await ws.send(json.dumps({
"t": "sending",
"format": codec_str
}))
# Send audio in chunks
chunk_size = 1024
position = 0
while position < len(audio_data):
chunk = audio_data[position:position + chunk_size]
await ws.send(json.dumps({
"t": "audio",
"data": base64.b64encode(chunk).decode("ascii"),
}))
position += chunk_size
# Hang up — proxy will drain buffered audio then send bye
await ws.send(json.dumps({"t": "bye"}))
msg = json.loads(await ws.recv())
if msg.get("t") == "bye":
log.info("Call ended")
else:
log.info("Received: %s", msg)
async def main():
if len(sys.argv) < 2:
print("Usage: audio-file-player.py <audio_file.wav> [port]")
sys.exit(1)
# Load WAV file
with wave.open(sys.argv[1], "rb") as wav:
if wav.getnchannels() != 1 or wav.getsampwidth() != 2:
log.error("Audio must be mono PCM16")
sys.exit(1)
audio_data = wav.readframes(wav.getnframes())
sample_rate = wav.getframerate()
log.info(f"Loaded {len(audio_data)} bytes at {sample_rate}Hz")
port = int(sys.argv[2]) if len(sys.argv) > 2 else 8080
async with websockets.serve(lambda ws: handle_client(ws, audio_data, sample_rate), "0.0.0.0", port):
log.info(f"Listening on port {port}")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python audio-file-player.py trumpet.wavffmpeg -i song.mp3 -ar 24000 -ac 1 -sample_fmt s16 output.wav
Samma idé som ovan fast med MP3 direkt — 46elks konverterar åt dig.
#!/usr/bin/env python3
"""MP3 file player demo.
Streams an MP3 file in chunks; the proxy transcodes to PCM16 and automatically
waits for all buffered audio to finish playing before hanging up.
Usage:
uv run python mp3-file-player.py <audio_file.mp3> [port]
"""
import asyncio
import base64
import json
import logging
import sys
import websockets
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s.%(msecs)03d %(message)s",
datefmt="%H:%M:%S"
)
log = logging.getLogger(__name__)
async def handle_client(ws, audio_data: bytes):
"""Handle WebSocket connection and stream MP3 audio."""
# Wait for hello
hello = json.loads(await ws.recv())
callid = hello.get("callid")
from_number = hello.get("from")
to_number = hello.get("to")
log.info(f"Call: {callid} {from_number} {to_number}")
# Send play with MP3 codec
await ws.send(json.dumps({
"t": "sending",
"format": "mp3"
}))
# Send MP3 data in 10KB chunks
chunk_size = 10 * 1024 # 10KB
position = 0
while position < len(audio_data):
chunk = audio_data[position:position + chunk_size]
await ws.send(json.dumps({
"t": "audio",
"data": base64.b64encode(chunk).decode("ascii"),
}))
position += chunk_size
# Hang up — proxy will drain buffered audio then send bye
await ws.send(json.dumps({"t": "bye"}))
msg = json.loads(await ws.recv())
if msg.get("t") == "bye":
log.info("Call ended")
else:
log.info("Received: %s", msg)
async def main():
if len(sys.argv) < 2:
print("Usage: mp3-file-player.py <audio_file.mp3> [port]")
sys.exit(1)
# Load MP3 file
with open(sys.argv[1], "rb") as f:
audio_data = f.read()
log.info(f"Loaded {len(audio_data)} bytes from {sys.argv[1]}")
port = int(sys.argv[2]) if len(sys.argv) > 2 else 8080
async with websockets.serve(lambda ws: handle_client(ws, audio_data), "0.0.0.0", port):
log.info(f"MP3 file player listening on port {port}")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python mp3-file-player.py trumpet.mp3Genererar 6 slumpade siffror och läser upp dem en efter en. Visar hur man köar flera ljudfiler efter varandra.
#!/usr/bin/env python3
"""Random digit sequence player demo.
Queues 6 random digits and a bye. The action queue plays them in order
and hangs up after the last digit finishes.
Demonstrates:
- Multiple play calls queued back-to-back
- bye queued behind all audio (proxy drains before disconnecting)
Usage:
uv run python digit-sequence-player.py [port]
"""
import asyncio
import base64
import json
import logging
import os
import random
import sys
import websockets
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s.%(msecs)03d %(message)s",
datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)
AUDIO_DIR = os.path.join(os.path.dirname(__file__), "audio_files")
def load_digit_files() -> dict[int, bytes]:
"""Load all digit WAV files into memory."""
digits = {}
for i in range(0, 10):
path = os.path.join(AUDIO_DIR, f"{i}.wav")
if not os.path.exists(path):
log.warning("Missing audio file: %s", path)
continue
with open(path, "rb") as f:
digits[i] = f.read()
return digits
async def send_file(ws, data: bytes) -> None:
"""Send a WAV file as play + audio chunks."""
await ws.send(json.dumps({
"t": "sending",
"format": "wav",
}))
chunk_size = 10 * 1024
for pos in range(0, len(data), chunk_size):
await ws.send(json.dumps({
"t": "audio",
"data": base64.b64encode(data[pos:pos + chunk_size]).decode("ascii"),
}))
async def handle_client(ws, digit_files: dict[int, bytes]) -> None:
"""Handle a call: queue 6 random digits then hang up."""
hello = json.loads(await ws.recv())
if hello.get("t") != "hello":
log.error("Expected hello, got: %s", hello.get("t"))
return
log.info("Call started: callid=%s from=%s to=%s",
hello.get("callid"),
hello.get("from"),
hello.get("to"))
digits = [random.randint(0, 9) for _ in range(6)]
log.info("Playing sequence: %s", "".join(str(d) for d in digits))
for i, digit in enumerate(digits):
await send_file(ws, digit_files[digit])
if i < len(digits) - 1:
await ws.send(json.dumps({"t": "sync"}))
await ws.send(json.dumps({"t": "bye"}))
log.info("All digits + bye queued")
async for message in ws:
msg = json.loads(message)
if msg.get("t") == "bye":
log.info("Call ended")
break
log.info("Received: %s", msg)
async def main() -> None:
digit_files = load_digit_files()
if not digit_files:
log.error("No digit audio files found in %s", AUDIO_DIR)
sys.exit(1)
log.info("Loaded %d digit files", len(digit_files))
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
async with websockets.serve(
lambda ws: handle_client(ws, digit_files), "0.0.0.0", port
):
log.info("Listening on port %d", port)
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python digit-sequence-player.py — kräver mappen audio_files/ med 0.wav–9.wavKopplar ditt telefonsamtal till OpenAI:s Realtime API. AI:n lyssnar, tänker och pratar tillbaka — live.
.env med: OPENAI_API_KEY=sk-din-nyckel och en fil openai_context.md som beskriver AI:ns personlighet.
#!/usr/bin/env python3
"""OpenAI Realtime API voice agent.
Bridges phone calls to OpenAI's Realtime API for voice-to-voice conversations.
Usage:
uv run python openai_voice_agent.py [port]
"""
import asyncio
import json
import logging
import os
import sys
import websockets
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
log.error("OPENAI_API_KEY not set")
sys.exit(1)
CODEC = "pcm_24000"
_context_file = os.path.join(os.path.dirname(__file__), "openai_context.md")
if not os.path.exists(_context_file):
log.error("openai_context.md not found: %s", _context_file)
sys.exit(1)
INSTRUCTIONS = open(_context_file).read()
async def handle_client(proxy_ws):
hello = json.loads(await proxy_ws.recv())
if hello.get("t") != "hello":
log.error("Expected hello, got: %s", hello.get("t"))
return
callid = hello.get("callid")
log.info("Call started: callid=%s from=%s to=%s", callid, hello.get("from"), hello.get("to"))
await proxy_ws.send(json.dumps({"t": "sending", "format": CODEC}))
await proxy_ws.send(json.dumps({"t": "listening", "format": CODEC}))
openai_ws = await websockets.connect(
"wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-12-17",
additional_headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"OpenAI-Beta": "realtime=v1",
},
)
log.info("Connected to OpenAI")
await openai_ws.send(json.dumps({
"type": "session.update",
"session": {
"modalities": ["audio", "text"],
"instructions": INSTRUCTIONS,
"voice": "alloy",
"input_audio_format": "pcm16",
"output_audio_format": "pcm16",
"input_audio_transcription": {"model": "whisper-1"},
"turn_detection": {
"type": "server_vad",
"threshold": 0.7,
"prefix_padding_ms": 300,
"silence_duration_ms": 800,
},
},
}))
await openai_ws.send(json.dumps({
"type": "response.create",
"response": {
"modalities": ["audio", "text"],
"instructions": "Greet the caller and ask how you can help them today.",
},
}))
async def proxy_to_openai():
try:
async for message in proxy_ws:
data = json.loads(message)
if data.get("t") == "audio":
await openai_ws.send(json.dumps({
"type": "input_audio_buffer.append",
"audio": data["data"],
}))
elif data.get("t") == "bye":
log.info("Call ended: callid=%s", callid)
break
except websockets.exceptions.ConnectionClosed:
pass
finally:
await openai_ws.close()
async def openai_to_proxy():
try:
async for message in openai_ws:
data = json.loads(message)
event_type = data.get("type")
if event_type == "input_audio_buffer.speech_started":
log.info("User speaking — cancelling assistant response")
await openai_ws.send(json.dumps({"type": "response.cancel"}))
await proxy_ws.send(json.dumps({"t": "interrupt"}))
await proxy_ws.send(json.dumps({"t": "sending", "format": CODEC}))
elif event_type == "conversation.item.input_audio_transcription.completed":
log.info("User: %s", data.get("transcript", "").strip())
elif event_type == "response.audio_transcript.done":
log.info("Assistant: %s", data.get("transcript", "").strip())
elif event_type == "response.audio.delta":
await proxy_ws.send(json.dumps({"t": "audio", "data": data["delta"]}))
elif event_type == "error":
error = data.get("error", {})
if error.get("code") != "response_cancel_not_active":
log.error("OpenAI error: %s", error)
except websockets.exceptions.ConnectionClosed:
pass
await asyncio.gather(proxy_to_openai(), openai_to_proxy())
async def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
log.info("Starting on port %d", port)
async with websockets.serve(handle_client, "0.0.0.0", port):
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python openai_voice_agent.pyopenai_context.md och gör AI:n till vad du vill: en pirat, en pizzeria, en terapeut, en quizvärd, företagets supportlinje...
Sparar 3 sekunder av ljud och spelar sedan upp det. Som viskleken, fast med dig själv.
#!/usr/bin/env python3
"""Delayed echo — buffers 3 seconds of audio, then plays it back."""
import asyncio
import collections
import json
import sys
import websockets
SAMPLE_RATE = 24000
BYTES_PER_SAMPLE = 2
DELAY_SECONDS = 3
BUFFER_CHUNKS = int(DELAY_SECONDS * SAMPLE_RATE * BYTES_PER_SAMPLE / 1024)
async def handle(ws):
hello = json.loads(await ws.recv())
print(f"📞 Call from {hello.get('from')}")
await ws.send(json.dumps({"t": "sending", "format": "pcm_24000"}))
await ws.send(json.dumps({"t": "listening", "format": "pcm_24000"}))
buffer = collections.deque(maxlen=BUFFER_CHUNKS)
async for message in ws:
data = json.loads(message)
if data.get("t") == "audio":
buffer.append(message)
if len(buffer) == BUFFER_CHUNKS:
await ws.send(buffer[0])
elif data.get("t") == "bye":
print("👋 Done")
break
async def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
async with websockets.serve(handle, "0.0.0.0", port):
print(f"🟢 Viskleken ready on port {port}")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python viskleken.pyDet absolut minsta exemplet. Tar emot samtalet och lägger på. Bra startpunkt att bygga vidare på.
#!/usr/bin/env python3
"""Minimal example — accept the call, then immediately hang up."""
import asyncio
import json
import sys
import websockets
async def handle(ws):
hello = json.loads(await ws.recv())
print(f"📞 {hello.get('from')} rang — lägger på!")
await ws.send(json.dumps({"t": "bye"}))
async def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
async with websockets.serve(handle, "0.0.0.0", port):
print(f"🟢 Waiting on port {port}")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python hangup.pySpelar in allt uppringaren säger och sparar det som en WAV-fil. Grunden för transkribering, sammanfattning, eller arkivering.
#!/usr/bin/env python3
"""Records the caller's audio to a WAV file."""
import asyncio
import base64
import json
import struct
import sys
import time
import websockets
SAMPLE_RATE = 24000
def save_wav(filename, pcm_data, sample_rate):
"""Write raw PCM16 data to a WAV file."""
num_samples = len(pcm_data) // 2
with open(filename, "wb") as f:
# WAV header
f.write(b"RIFF")
f.write(struct.pack("<I", 36 + len(pcm_data)))
f.write(b"WAVE")
f.write(b"fmt ")
f.write(struct.pack("<IHHIIHH", 16, 1, 1, sample_rate, sample_rate * 2, 2, 16))
f.write(b"data")
f.write(struct.pack("<I", len(pcm_data)))
f.write(pcm_data)
async def handle(ws):
hello = json.loads(await ws.recv())
caller = hello.get("from", "unknown")
print(f"📞 Recording call from {caller}")
await ws.send(json.dumps({"t": "listening", "format": "pcm_24000"}))
chunks = []
async for message in ws:
data = json.loads(message)
if data.get("t") == "audio":
chunks.append(base64.b64decode(data["data"]))
elif data.get("t") == "bye":
break
pcm_data = b"".join(chunks)
filename = f"call_{int(time.time())}.wav"
save_wav(filename, pcm_data, SAMPLE_RATE)
duration = len(pcm_data) / (SAMPLE_RATE * 2)
print(f"💾 Saved {duration:.1f}s to {filename}")
async def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
async with websockets.serve(handle, "0.0.0.0", port):
print(f"🟢 Recorder ready on port {port}")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())
python recorder.py — sparar en WAV-fil per samtal