diff options
author | LRN <lrn1986@gmail.com> | 2014-01-17 04:12:51 +0000 |
---|---|---|
committer | LRN <lrn1986@gmail.com> | 2014-01-17 04:12:51 +0000 |
commit | b8bfbf5d0801afc80a8f44f9df7f8a1e95bb2bbc (patch) | |
tree | 95a094140804e032598f137f76a3c0916705a58f /src/conversation/gnunet-helper-audio-record.c | |
parent | a53b100e3e326970708e62c7660f09d40aae58d7 (diff) | |
download | gnunet-b8bfbf5d0801afc80a8f44f9df7f8a1e95bb2bbc.tar.gz gnunet-b8bfbf5d0801afc80a8f44f9df7f8a1e95bb2bbc.zip |
Wrap opus in ogg container
Diffstat (limited to 'src/conversation/gnunet-helper-audio-record.c')
-rw-r--r-- | src/conversation/gnunet-helper-audio-record.c | 362 |
1 files changed, 316 insertions, 46 deletions
diff --git a/src/conversation/gnunet-helper-audio-record.c b/src/conversation/gnunet-helper-audio-record.c index 20812599f..9caad611b 100644 --- a/src/conversation/gnunet-helper-audio-record.c +++ b/src/conversation/gnunet-helper-audio-record.c | |||
@@ -38,9 +38,104 @@ | |||
38 | #include <pulse/pulseaudio.h> | 38 | #include <pulse/pulseaudio.h> |
39 | #include <opus/opus.h> | 39 | #include <opus/opus.h> |
40 | #include <opus/opus_types.h> | 40 | #include <opus/opus_types.h> |
41 | #include <ogg/ogg.h> | ||
41 | 42 | ||
43 | #define DEBUG_RECORD_PURE_OGG 1 | ||
44 | |||
45 | /** | ||
46 | * Sampling rate | ||
47 | */ | ||
42 | #define SAMPLING_RATE 48000 | 48 | #define SAMPLING_RATE 48000 |
43 | 49 | ||
50 | /** | ||
51 | * How many ms of audio to buffer before encoding them. | ||
52 | * Possible values: | ||
53 | * 60, 40, 20, 10, 5, 2.5 | ||
54 | */ | ||
55 | #define FRAME_SIZE_MS 40 | ||
56 | |||
57 | /** | ||
58 | * How many samples to buffer before encoding them. | ||
59 | */ | ||
60 | #define FRAME_SIZE (SAMPLING_RATE / 1000 * FRAME_SIZE_MS) | ||
61 | |||
62 | /** | ||
63 | * Pages are commited when their size goes over this value. | ||
64 | * Note that in practice we flush pages VERY often (every frame), | ||
65 | * which means that pages NEVER really get to be this big. | ||
66 | * With one-packet-per-page, pages are roughly 100-300 bytes each. | ||
67 | * | ||
68 | * This value is chosen to make MAX_PAYLOAD_BYTES=1024 fit | ||
69 | * into a single page. | ||
70 | */ | ||
71 | #define PAGE_WATERLINE 800 | ||
72 | |||
73 | /** | ||
74 | * Maximum length of opus payload | ||
75 | */ | ||
76 | #define MAX_PAYLOAD_BYTES 1024 | ||
77 | |||
78 | /** | ||
79 | * Number of channels | ||
80 | */ | ||
81 | #define CHANNELS 1 | ||
82 | |||
83 | /** | ||
84 | * Configures the encoder's expected packet loss percentage. | ||
85 | * | ||
86 | * Higher values will trigger progressively more loss resistant behavior | ||
87 | * in the encoder at the expense of quality at a given bitrate | ||
88 | * in the lossless case, but greater quality under loss. | ||
89 | */ | ||
90 | #define CONV_OPUS_PACKET_LOSS_PERCENTAGE 1 | ||
91 | |||
92 | /** | ||
93 | * Configures the encoder's computational complexity. | ||
94 | * | ||
95 | * The supported range is 0-10 inclusive with 10 representing | ||
96 | * the highest complexity. | ||
97 | */ | ||
98 | #define CONV_OPUS_ENCODING_COMPLEXITY 10 | ||
99 | |||
100 | /** | ||
101 | * Configures the encoder's use of inband forward error correction (FEC). | ||
102 | * | ||
103 | * Note: This is only applicable to the LPC layer. | ||
104 | */ | ||
105 | #define CONV_OPUS_INBAND_FEC 1 | ||
106 | |||
107 | /** | ||
108 | * Configures the type of signal being encoded. | ||
109 | * | ||
110 | * This is a hint which helps the encoder's mode selection. | ||
111 | * | ||
112 | * Possible values: | ||
113 | * OPUS_AUTO - (default) Encoder detects the type automatically. | ||
114 | * OPUS_SIGNAL_VOICE - Bias thresholds towards choosing LPC or Hybrid modes. | ||
115 | * OPUS_SIGNAL_MUSIC - Bias thresholds towards choosing MDCT modes. | ||
116 | */ | ||
117 | #define CONV_OPUS_SIGNAL OPUS_AUTO | ||
118 | |||
119 | /** | ||
120 | * Coding mode. | ||
121 | * | ||
122 | * Possible values: | ||
123 | * OPUS_APPLICATION_VOIP - gives best quality at a given bitrate for voice | ||
124 | * signals. It enhances the input signal by high-pass filtering and | ||
125 | * emphasizing formants and harmonics. Optionally it includes in-band forward | ||
126 | * error correction to protect against packet loss. Use this mode for typical | ||
127 | * VoIP applications. Because of the enhancement, even at high bitrates | ||
128 | * the output may sound different from the input. | ||
129 | * OPUS_APPLICATION_AUDIO - gives best quality at a given bitrate for most | ||
130 | * non-voice signals like music. Use this mode for music and mixed | ||
131 | * (music/voice) content, broadcast, and applications requiring less than | ||
132 | * 15 ms of coding delay. | ||
133 | * OPUS_APPLICATION_RESTRICTED_LOWDELAY - configures low-delay mode that | ||
134 | * disables the speech-optimized mode in exchange for slightly reduced delay. | ||
135 | * This mode can only be set on an newly initialized or freshly reset encoder | ||
136 | * because it changes the codec delay. | ||
137 | */ | ||
138 | #define CONV_OPUS_APP_TYPE OPUS_APPLICATION_VOIP | ||
44 | 139 | ||
45 | /** | 140 | /** |
46 | * Specification for recording. May change in the future to spec negotiation. | 141 | * Specification for recording. May change in the future to spec negotiation. |
@@ -48,9 +143,38 @@ | |||
48 | static pa_sample_spec sample_spec = { | 143 | static pa_sample_spec sample_spec = { |
49 | .format = PA_SAMPLE_FLOAT32LE, | 144 | .format = PA_SAMPLE_FLOAT32LE, |
50 | .rate = SAMPLING_RATE, | 145 | .rate = SAMPLING_RATE, |
51 | .channels = 1 | 146 | .channels = CHANNELS |
52 | }; | 147 | }; |
53 | 148 | ||
149 | GNUNET_NETWORK_STRUCT_BEGIN | ||
150 | |||
151 | /* OggOpus spec says the numbers must be in little-endian order */ | ||
152 | struct OpusHeadPacket | ||
153 | { | ||
154 | uint8_t magic[8]; | ||
155 | uint8_t version; | ||
156 | uint8_t channels; | ||
157 | uint16_t preskip GNUNET_PACKED; | ||
158 | uint32_t sampling_rate GNUNET_PACKED; | ||
159 | uint16_t gain GNUNET_PACKED; | ||
160 | uint8_t channel_mapping; | ||
161 | }; | ||
162 | |||
163 | struct OpusCommentsPacket | ||
164 | { | ||
165 | uint8_t magic[8]; | ||
166 | uint32_t vendor_length; | ||
167 | /* followed by: | ||
168 | char vendor[vendor_length]; | ||
169 | uint32_t string_count; | ||
170 | followed by @a string_count pairs of: | ||
171 | uint32_t string_length; | ||
172 | char string[string_length]; | ||
173 | */ | ||
174 | }; | ||
175 | |||
176 | GNUNET_NETWORK_STRUCT_END | ||
177 | |||
54 | /** | 178 | /** |
55 | * Pulseaudio mainloop api | 179 | * Pulseaudio mainloop api |
56 | */ | 180 | */ |
@@ -82,7 +206,7 @@ static pa_io_event *stdio_event; | |||
82 | static OpusEncoder *enc; | 206 | static OpusEncoder *enc; |
83 | 207 | ||
84 | /** | 208 | /** |
85 | * | 209 | * Buffer for encoded data |
86 | */ | 210 | */ |
87 | static unsigned char *opus_data; | 211 | static unsigned char *opus_data; |
88 | 212 | ||
@@ -97,16 +221,6 @@ static float *pcm_buffer; | |||
97 | static int pcm_length; | 221 | static int pcm_length; |
98 | 222 | ||
99 | /** | 223 | /** |
100 | * Number of samples for one frame | ||
101 | */ | ||
102 | static int frame_size; | ||
103 | |||
104 | /** | ||
105 | * Maximum length of opus payload | ||
106 | */ | ||
107 | static int max_payload_bytes = 1500; | ||
108 | |||
109 | /** | ||
110 | * Audio buffer | 224 | * Audio buffer |
111 | */ | 225 | */ |
112 | static char *transmit_buffer; | 226 | static char *transmit_buffer; |
@@ -126,6 +240,28 @@ static size_t transmit_buffer_index; | |||
126 | */ | 240 | */ |
127 | static struct AudioMessage *audio_message; | 241 | static struct AudioMessage *audio_message; |
128 | 242 | ||
243 | /** | ||
244 | * Ogg muxer state | ||
245 | */ | ||
246 | static ogg_stream_state os; | ||
247 | |||
248 | /** | ||
249 | * Ogg packet id | ||
250 | */ | ||
251 | static int32_t packet_id; | ||
252 | |||
253 | /** | ||
254 | * Ogg granule for current packet | ||
255 | */ | ||
256 | static int64_t enc_granulepos; | ||
257 | |||
258 | #ifdef DEBUG_RECORD_PURE_OGG | ||
259 | /** | ||
260 | * 1 to not to write GNUnet message headers, | ||
261 | * producing pure playable ogg output | ||
262 | */ | ||
263 | static int dump_pure_ogg; | ||
264 | #endif | ||
129 | 265 | ||
130 | /** | 266 | /** |
131 | * Pulseaudio shutdown task | 267 | * Pulseaudio shutdown task |
@@ -138,20 +274,59 @@ quit (int ret) | |||
138 | } | 274 | } |
139 | 275 | ||
140 | 276 | ||
277 | static void | ||
278 | write_data (const char *ptr, size_t msg_size) | ||
279 | { | ||
280 | ssize_t ret; | ||
281 | size_t off; | ||
282 | off = 0; | ||
283 | while (off < msg_size) | ||
284 | { | ||
285 | ret = write (1, &ptr[off], msg_size - off); | ||
286 | if (0 >= ret) | ||
287 | { | ||
288 | if (-1 == ret) | ||
289 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); | ||
290 | quit (2); | ||
291 | } | ||
292 | off += ret; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void | ||
297 | write_page (ogg_page *og) | ||
298 | { | ||
299 | static unsigned long long toff; | ||
300 | size_t msg_size; | ||
301 | msg_size = sizeof (struct AudioMessage) + og->header_len + og->body_len; | ||
302 | audio_message->header.size = htons ((uint16_t) msg_size); | ||
303 | memcpy (&audio_message[1], og->header, og->header_len); | ||
304 | memcpy (((char *) &audio_message[1]) + og->header_len, og->body, og->body_len); | ||
305 | |||
306 | toff += msg_size; | ||
307 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
308 | "Sending %u bytes of audio data (total: %llu)\n", | ||
309 | (unsigned int) msg_size, | ||
310 | toff); | ||
311 | #ifdef DEBUG_RECORD_PURE_OGG | ||
312 | if (dump_pure_ogg) | ||
313 | write_data ((const char *) &audio_message[1], og->header_len + og->body_len); | ||
314 | else | ||
315 | #endif | ||
316 | write_data ((const char *) audio_message, msg_size); | ||
317 | } | ||
318 | |||
141 | /** | 319 | /** |
142 | * Creates OPUS packets from PCM data | 320 | * Creates OPUS packets from PCM data |
143 | */ | 321 | */ |
144 | static void | 322 | static void |
145 | packetizer () | 323 | packetizer () |
146 | { | 324 | { |
147 | static unsigned long long toff; | ||
148 | char *nbuf; | 325 | char *nbuf; |
149 | size_t new_size; | 326 | size_t new_size; |
150 | const char *ptr; | 327 | int32_t len; |
151 | size_t off; | 328 | ogg_packet op; |
152 | ssize_t ret; | 329 | ogg_page og; |
153 | int len; // FIXME: int? | ||
154 | size_t msg_size; | ||
155 | 330 | ||
156 | while (transmit_buffer_length >= transmit_buffer_index + pcm_length) | 331 | while (transmit_buffer_length >= transmit_buffer_index + pcm_length) |
157 | { | 332 | { |
@@ -160,37 +335,42 @@ packetizer () | |||
160 | pcm_length); | 335 | pcm_length); |
161 | transmit_buffer_index += pcm_length; | 336 | transmit_buffer_index += pcm_length; |
162 | len = | 337 | len = |
163 | opus_encode_float (enc, pcm_buffer, frame_size, opus_data, | 338 | opus_encode_float (enc, pcm_buffer, FRAME_SIZE, opus_data, |
164 | max_payload_bytes); | 339 | MAX_PAYLOAD_BYTES); |
165 | 340 | ||
341 | if (len < 0) | ||
342 | { | ||
343 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
344 | _("opus_encode_float() failed: %s. Aborting\n"), | ||
345 | opus_strerror (len)); | ||
346 | quit (5); | ||
347 | } | ||
166 | if (len > UINT16_MAX - sizeof (struct AudioMessage)) | 348 | if (len > UINT16_MAX - sizeof (struct AudioMessage)) |
167 | { | 349 | { |
168 | GNUNET_break (0); | 350 | GNUNET_break (0); |
169 | continue; | 351 | continue; |
170 | } | 352 | } |
171 | 353 | ||
354 | /* As per OggOpus spec, granule is calculated as if the audio | ||
355 | had 48kHz sampling rate. */ | ||
356 | enc_granulepos += FRAME_SIZE * 48000 / SAMPLING_RATE; | ||
172 | 357 | ||
173 | msg_size = sizeof (struct AudioMessage) + len; | 358 | op.packet = (unsigned char *) opus_data; |
174 | audio_message->header.size = htons ((uint16_t) msg_size); | 359 | op.bytes = len; |
175 | memcpy (&audio_message[1], opus_data, len); | 360 | op.b_o_s = 0; |
361 | op.e_o_s = 0; | ||
362 | op.granulepos = enc_granulepos; | ||
363 | op.packetno = packet_id++; | ||
364 | ogg_stream_packetin (&os, &op); | ||
176 | 365 | ||
177 | toff += msg_size; | 366 | while (ogg_stream_flush_fill (&os, &og, PAGE_WATERLINE)) |
178 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
179 | "Sending %u bytes of audio data (total: %llu)\n", | ||
180 | (unsigned int) msg_size, | ||
181 | toff); | ||
182 | ptr = (const char *) audio_message; | ||
183 | off = 0; | ||
184 | while (off < msg_size) | ||
185 | { | 367 | { |
186 | ret = write (1, &ptr[off], msg_size - off); | 368 | if (og.header_len + og.body_len > UINT16_MAX - sizeof (struct AudioMessage)) |
187 | if (0 >= ret) | ||
188 | { | 369 | { |
189 | if (-1 == ret) | 370 | GNUNET_assert (0); |
190 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); | 371 | continue; |
191 | quit (2); | ||
192 | } | 372 | } |
193 | off += ret; | 373 | write_page (&og); |
194 | } | 374 | } |
195 | } | 375 | } |
196 | 376 | ||
@@ -460,27 +640,112 @@ pa_init () | |||
460 | static void | 640 | static void |
461 | opus_init () | 641 | opus_init () |
462 | { | 642 | { |
463 | int channels = 1; | ||
464 | int err; | 643 | int err; |
465 | 644 | ||
466 | frame_size = SAMPLING_RATE / 50; | 645 | pcm_length = FRAME_SIZE * CHANNELS * sizeof (float); |
467 | pcm_length = frame_size * channels * sizeof (float); | ||
468 | pcm_buffer = pa_xmalloc (pcm_length); | 646 | pcm_buffer = pa_xmalloc (pcm_length); |
469 | opus_data = GNUNET_malloc (max_payload_bytes); | 647 | opus_data = GNUNET_malloc (MAX_PAYLOAD_BYTES); |
470 | enc = opus_encoder_create (SAMPLING_RATE, | 648 | enc = opus_encoder_create (SAMPLING_RATE, |
471 | channels, | 649 | CHANNELS, |
472 | OPUS_APPLICATION_VOIP, | 650 | CONV_OPUS_APP_TYPE, |
473 | &err); | 651 | &err); |
474 | opus_encoder_ctl (enc, | 652 | opus_encoder_ctl (enc, |
475 | OPUS_SET_PACKET_LOSS_PERC(1)); | 653 | OPUS_SET_PACKET_LOSS_PERC (CONV_OPUS_PACKET_LOSS_PERCENTAGE)); |
476 | opus_encoder_ctl (enc, | 654 | opus_encoder_ctl (enc, |
477 | OPUS_SET_COMPLEXITY(10)); | 655 | OPUS_SET_COMPLEXITY (CONV_OPUS_ENCODING_COMPLEXITY)); |
478 | opus_encoder_ctl (enc, | 656 | opus_encoder_ctl (enc, |
479 | OPUS_SET_INBAND_FEC(1)); | 657 | OPUS_SET_INBAND_FEC (CONV_OPUS_INBAND_FEC)); |
480 | opus_encoder_ctl (enc, | 658 | opus_encoder_ctl (enc, |
481 | OPUS_SET_SIGNAL (OPUS_SIGNAL_VOICE)); | 659 | OPUS_SET_SIGNAL (OPUS_SIGNAL_VOICE)); |
482 | } | 660 | } |
483 | 661 | ||
662 | static void | ||
663 | ogg_init () | ||
664 | { | ||
665 | int serialno; | ||
666 | struct OpusHeadPacket headpacket; | ||
667 | struct OpusCommentsPacket *commentspacket; | ||
668 | size_t commentspacket_len; | ||
669 | |||
670 | serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG, 0x7FFFFFFF); | ||
671 | |||
672 | /*Initialize Ogg stream struct*/ | ||
673 | if (-1 == ogg_stream_init (&os, serialno)) | ||
674 | { | ||
675 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
676 | _("ogg_stream_init() failed.\n")); | ||
677 | exit (3); | ||
678 | } | ||
679 | |||
680 | packet_id = 0; | ||
681 | |||
682 | /*Write header*/ | ||
683 | { | ||
684 | ogg_packet op; | ||
685 | ogg_page og; | ||
686 | const char *opusver; | ||
687 | int vendor_length; | ||
688 | |||
689 | memcpy (headpacket.magic, "OpusHead", 8); | ||
690 | headpacket.version = 1; | ||
691 | headpacket.channels = CHANNELS; | ||
692 | headpacket.preskip = GNUNET_htole16 (0); | ||
693 | headpacket.sampling_rate = GNUNET_htole32 (SAMPLING_RATE); | ||
694 | headpacket.gain = GNUNET_htole16 (0); | ||
695 | headpacket.channel_mapping = 0; /* Mono or stereo */ | ||
696 | |||
697 | op.packet = (unsigned char *) &headpacket; | ||
698 | op.bytes = sizeof (headpacket); | ||
699 | op.b_o_s = 1; | ||
700 | op.e_o_s = 0; | ||
701 | op.granulepos = 0; | ||
702 | op.packetno = packet_id++; | ||
703 | ogg_stream_packetin (&os, &op); | ||
704 | |||
705 | /* Head packet must be alone on its page */ | ||
706 | while (ogg_stream_flush (&os, &og)) | ||
707 | { | ||
708 | write_page (&og); | ||
709 | } | ||
710 | |||
711 | commentspacket_len = sizeof (*commentspacket); | ||
712 | opusver = opus_get_version_string (); | ||
713 | vendor_length = strlen (opusver); | ||
714 | commentspacket_len += vendor_length; | ||
715 | commentspacket_len += sizeof (uint32_t); | ||
716 | |||
717 | commentspacket = (struct OpusCommentsPacket *) malloc (commentspacket_len); | ||
718 | if (NULL == commentspacket) | ||
719 | { | ||
720 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
721 | _("Failed to allocate %d bytes for second packet\n"), | ||
722 | commentspacket_len); | ||
723 | exit (5); | ||
724 | } | ||
725 | |||
726 | memcpy (commentspacket->magic, "OpusTags", 8); | ||
727 | commentspacket->vendor_length = GNUNET_htole32 (vendor_length); | ||
728 | memcpy (&commentspacket[1], opusver, vendor_length); | ||
729 | *(uint32_t *) &((char *) &commentspacket[1])[vendor_length] = \ | ||
730 | GNUNET_htole32 (0); /* no tags */ | ||
731 | |||
732 | op.packet = (unsigned char *) commentspacket; | ||
733 | op.bytes = commentspacket_len; | ||
734 | op.b_o_s = 0; | ||
735 | op.e_o_s = 0; | ||
736 | op.granulepos = 0; | ||
737 | op.packetno = packet_id++; | ||
738 | ogg_stream_packetin (&os, &op); | ||
739 | |||
740 | /* Comment packets must not be mixed with audio packets on their pages */ | ||
741 | while (ogg_stream_flush (&os, &og)) | ||
742 | { | ||
743 | write_page (&og); | ||
744 | } | ||
745 | |||
746 | free (commentspacket); | ||
747 | } | ||
748 | } | ||
484 | 749 | ||
485 | /** | 750 | /** |
486 | * The main function for the record helper. | 751 | * The main function for the record helper. |
@@ -500,6 +765,11 @@ main (int argc, char *argv[]) | |||
500 | "Audio source starts\n"); | 765 | "Audio source starts\n"); |
501 | audio_message = GNUNET_malloc (UINT16_MAX); | 766 | audio_message = GNUNET_malloc (UINT16_MAX); |
502 | audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO); | 767 | audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO); |
768 | |||
769 | #ifdef DEBUG_RECORD_PURE_OGG | ||
770 | dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0; | ||
771 | #endif | ||
772 | ogg_init (); | ||
503 | opus_init (); | 773 | opus_init (); |
504 | pa_init (); | 774 | pa_init (); |
505 | return 0; | 775 | return 0; |