用 WebRTC 和 Pion 打造一款网络录音机

Posted on Sun 09 March 2025 in Journal

Abstract 用 WebRTC 和 Pion 打造一款网络录音机
Authors Walter Fan
 Category    learning note  
Status v1.0
Updated 2025-03-09
License CC-BY-NC-ND 4.0

用 WebRTC 和 Pion 打造一款网络录音机

WebRTC(Web Real-Time Communication)是一项支持网页浏览器和移动应用进行实时语音、视频对话的技术,允许开发者构建实时通信应用。

Pion 是一个用 Go 语言编写的开源 WebRTC 实现,提供了构建实时通信应用的工具和库。

为什么选择 WebRTC 和 Pion

选择 WebRTC 和 Pion 的原因包括:

  • 跨平台支持:WebRTC 支持多种平台,包括浏览器、移动设备和 IoT 设备,适用于各种应用场景。

  • 开源和社区支持:Pion 是一个活跃的开源项目,拥有丰富的文档和社区资源,便于开发者学习和使用。

  • 高性能:Pion 使用 Go 语言编写,具有高并发和高性能的特点,适合构建实时通信应用。

如何使用 WebRTC 和 Pion 构建网络录音机

构建网络录音机的步骤如下:

  1. 设置 Pion WebRTC:在 Go 项目中引入 Pion WebRTC 库,初始化 WebRTC 的配置和媒体引擎。

  2. 创建媒体轨道:使用 Pion 创建音频媒体轨道,用于接收和发送音频数据。

  3. 处理音频数据:接收音频轨道的数据,并使用 Go 的音频处理库进行处理,如编码、解码或保存为文件。

  4. 建立连接:使用 Pion 的信令机制,在客户端和服务器之间建立 WebRTC 连接,传输音频数据。

  5. 实现录音功能:在服务器端接收音频数据,并将其保存为音频文件,实现录音功能。

示例代码

以下是一个使用 Pion 创建音频轨道并接收音频数据的示例代码, 完整代码参见 https://github.com/walterfan/webrtc-transcriber

  • main.go
package main

import (
    "fmt"
    "github.com/pion/webrtc/v3"
)

func main() {
     //..
    var webrtc rtc.Service
    webrtc = rtc.NewPionRtcService(*stunServer, tr)
    // webrtc = rtc.NewLoggingService(webrtc)

    // Endpoint to create a new speech to text session
    http.Handle("/session", session.MakeHandler(webrtc))

    // Serve static assets
    http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
        http.ServeFile(w, r, "./web/index.html")
    })
    http.Handle("/static/", http.StripPrefix("/static", http.FileServer(http.Dir("./web"))))

    errors := make(chan error, 2)
    go func() {
        log.Printf("Starting signaling server on port %s", *httpPort)
        errors <- http.ListenAndServe(fmt.Sprintf(":%s", *httpPort), nil)
    }()

     //...
}
  • pion.go
package rtc

import (
    "encoding/binary"
    "fmt"
    "io"
    "log"
    "os"
    "time"

    "github.com/go-audio/audio"
    "github.com/go-audio/wav"
    "github.com/pion/webrtc/v2"
    "github.com/rviscarra/webrtc-speech-to-text/internal/transcribe"
)

const (
    SAMPLE_RATE = 16000
    BIT_DEPTH   = 16
    NUM_CHANS   = 2
)

// PionPeerConnection is a webrtc.PeerConnection wrapper that implements the
// PeerConnection interface
type PionPeerConnection struct {
    pc *webrtc.PeerConnection
}

// PionRtcService is our implementation of the rtc.Service
type PionRtcService struct {
    stunServer  string
    transcriber transcribe.Service
}

// NewPionRtcService creates a new instances of PionRtcService
func NewPionRtcService(stun string, transcriber transcribe.Service) Service {
    return &PionRtcService{
        stunServer:  stun,
        transcriber: transcriber,
    }
}

// ProcessOffer handles the SDP offer coming from the client,
// return the SDP answer that must be passed back to stablish the WebRTC
// connection.
func (p *PionPeerConnection) ProcessOffer(offer string) (string, error) {
    err := p.pc.SetRemoteDescription(webrtc.SessionDescription{
        SDP:  offer,
        Type: webrtc.SDPTypeOffer,
    })
    if err != nil {
        return "", err
    }

    answer, err := p.pc.CreateAnswer(nil)
    if err != nil {
        return "", err
    }
    err = p.pc.SetLocalDescription(answer)
    if err != nil {
        return "", err
    }
    return answer.SDP, nil
}

// Close just closes the underlying peer connection
func (p *PionPeerConnection) Close() error {
    return p.pc.Close()
}

func bytesToIntSlice(data []byte, bitDepth int) []int {
    ints := make([]int, len(data)/2) // 16-bit = 2 bytes
    for i := range ints {
        // little endian for pcm
        ints[i] = int(int16(binary.LittleEndian.Uint16(data[2*i:])))
    }
    return ints
}

func (pi *PionRtcService) handleAudioTrack(track *webrtc.Track, dc *webrtc.DataChannel) error {
    decoder, err := newDecoder()
    if err != nil {
        return err
    }

    // create wav file
    file, err := os.Create("audio_output.wav")
    if err != nil {
        return err
    }
    defer file.Close()

    // encode to pcm

    wavEncoder := wav.NewEncoder(file,
        SAMPLE_RATE, // sample rate
        BIT_DEPTH,   // bitsPerSample
        NUM_CHANS,   // channels
        1)           // encoding format: PCM

    // ensure the encoder is closed
    defer func() {
        if err := wavEncoder.Close(); err != nil {
            log.Printf("Error closing WAV encoder: %v", err)
        }
    }()

    errs := make(chan error, 2)
    audioStream := make(chan []byte)
    response := make(chan bool)
    timer := time.NewTimer(5 * time.Second)
    go func() {
        for {
            packet, err := track.ReadRTP()
            timer.Reset(1 * time.Second)
            if err != nil {
                timer.Stop()
                if err == io.EOF {
                    close(audioStream)
                    return
                }
                errs <- err
                return
            }
            audioStream <- packet.Payload
            <-response
        }
    }()
    err = nil
    for {
        select {
        case audioChunk := <-audioStream:
            payload, err := decoder.decode(audioChunk)
            response <- true
            if err != nil {
                return err
            }
            log.Printf("Received %d bytes", len(payload))

            // create audio buffer
            buf := &audio.IntBuffer{
                Format: &audio.Format{
                    SampleRate:  SAMPLE_RATE,
                    NumChannels: NUM_CHANS,
                },
                Data:           bytesToIntSlice(payload, BIT_DEPTH), // convert bytes
                SourceBitDepth: BIT_DEPTH,
            }

            // write audio buffer to wav file
            if err := wavEncoder.Write(buf); err != nil {
                return fmt.Errorf("wav file write failure: %w", err)
            }

        case <-timer.C:
            return fmt.Errorf("read operation timed out")
        case err = <-errs:
            log.Printf("unexpected error reading track %s: %v", track.ID(), err)
            return err
        }
    }
}

// CreatePeerConnection creates and configures a new peer connection for
// our purposes, receive one audio track and send data through one DataChannel
func (pi *PionRtcService) CreatePeerConnection() (PeerConnection, error) {
    pcconf := webrtc.Configuration{
        ICEServers: []webrtc.ICEServer{
            webrtc.ICEServer{
                URLs: []string{pi.stunServer},
            },
        },
        SDPSemantics: webrtc.SDPSemanticsUnifiedPlanWithFallback,
    }
    pc, err := webrtc.NewPeerConnection(pcconf)
    if err != nil {
        return nil, err
    }

    dataChan := make(chan *webrtc.DataChannel)

    pc.OnDataChannel(func(dc *webrtc.DataChannel) {
        dataChan <- dc
    })

    pc.OnTrack(func(track *webrtc.Track, r *webrtc.RTPReceiver) {
        if track.Codec().Name == "opus" {
            log.Printf("Received audio (%s) track, id = %s\n", track.Codec().Name, track.ID())
            err := pi.handleAudioTrack(track, <-dataChan)
            if err != nil {
                log.Printf("Error reading track (%s): %v\n", track.ID(), err)
            }
        }
    })

    pc.OnICEConnectionStateChange(func(connState webrtc.ICEConnectionState) {
        log.Printf("Connection state: %s \n", connState.String())
    })

    _, err = pc.AddTransceiver(webrtc.RTPCodecTypeAudio, webrtc.RtpTransceiverInit{
        Direction: webrtc.RTPTransceiverDirectionRecvonly,
    })
    if err != nil {
        log.Printf("Can't add transceiver: %s", err)
        return nil, err
    }

    return &PionPeerConnection{
        pc: pc,
    }, nil
}

本作品采用知识共享署名-非商业性使用-禁止演绎 4.0 国际许可协议进行许可。