双声道wav PCM音频文件采样率修改

时间2024-03-27 12:04:02发布caterwang分类LinuxC程序浏览2829

遇到在高采样率的wav文件PCM数据送给网络音箱进行播放时不正常的情况。于是就需要对wav文件进行处理。输入的文件同时满足以下条件时有效：双声道，采样率大于11025，采样深度，即位宽为16。

wav文件头部信息格式

#pragma pack(push, 1)
typedef struct  
{
	int8_t 		chunk_id[4]	= { 0 };			// 文件标识，通常为"RIFF"
	uint32_t 	chunk_size 	= 0;				// 文件数据大小,该值 + 8 就是整个WAV文件的大小
	int8_t 		format[4]	= { 0 };			// 文件格式，“WAVE”
	int8_t 		fmt_chunk_id[4]	= { 0 };		// 子块标识，“fmt”
	uint32_t 	fmt_chunk_size 	= 0;			// 子块大小,其数值不确定,取决于编码格式。可以是16、18、20、40 等
	uint16_t 	audio_fomat  	= 0;			// 音频格式，1为PCM格式
	uint16_t 	num_channels 	= 0;			// 声道数，1:单声道，2:双声道(立体声)
	uint32_t 	sample_rate    	= 0;			// 采样率，常用的采样频率有 11025，22050和44100KHZ
	uint32_t 	byte_rate      	= 0;			// 每秒的字节数，值=声道数x采样率x每样本的数据位数/8。播放软件利用这个值可以估计缓冲区大小。
	uint16_t 	block_align  	= 0;			// 块对齐 采样帧大小。该数值为:声道数x位数/8。播放软件需要一次处理多个该值大小的字节数据,用该数值调整缓冲区
	uint16_t 	bits_per_sample = 0;			// 采样深度，即采样位数。常见是4、8、12、16、24、32
	int8_t 		data_chunk_id[4]	= { 0 };	// 子块标识，“data”
	uint32_t	data_chunk_size   	= 0;		// 子块大小
	int32_t		num_frame = 0;					// 文件总帧数
	int32_t		start_pos = 0;					// 音频数据的开始位置
}wavHead;

#pragma pack(pop)

这个过程分为三步

第一步.取出原wav文件的头部信息

第二步.抽样并存储数据，存储时需要从文件偏移头部信息的长度，36字节(sizeof(wavHead)-8)。

第三步.修改头部信息

void wavHeadFormat(wavHead &wh,int nSampleRate,int ndata_chunk_size)
{
	wh.sample_rate 		= nSampleRate;
	wh.byte_rate   		= wh.num_channels*wh.sample_rate*wh.bits_per_sample/8;
	wh.data_chunk_size 	= ndata_chunk_size;								//子块大小,PCM数据大小
	wh.chunk_size      	= ndata_chunk_size+wh.start_pos-8;				// 文件数据大小,该值 + 8 就是整个WAV文件的大小
}

第四步.把修改好的头部信息写入wav文件头部

    fseek(fp,0,SEEK_SET);	//定位到文件起始位置
    wavHeadFormat(wh,11025,data_chunk_size);//修改头部信息
    fwrite(wh.chunk_id, sizeof(wavHead)-8, 1, fp);

以下是完整文件代码

文件wavEdit.cpp

#include <stdio.h>
#include <stdlib.h>
#include "hri_utils.h"
#include "wavEdit.h"

char* read_File_rb(const char * filename,int &datalen)
{
	int size=16;
	int count = 1;

	char * outdat = NULL;
    
	//取文件大小
	struct stat statbuf;
	int ret = stat(filename,&statbuf);	//调用stat函数

    if(ret != 0)					//获取失败。
	{
		return outdat;
	}

    datalen = statbuf.st_size;		//返回文件大小。	
	
	FILE* fp = fopen(filename, "rb");
    if (NULL != fp) 
	{
		outdat = (char *)malloc(datalen+size);
		memset(outdat,0,datalen+size);
		
		fread((void*)outdat,(size_t)datalen,1,fp);
		
        fclose(fp);
        return outdat;
    } 
	else 
	{
        return outdat;
    }

}

void wavHeadFormat(wavHead &wh,int nSampleRate,int ndata_chunk_size)
{
	wh.sample_rate 		= nSampleRate;
	wh.byte_rate   		= wh.num_channels*wh.sample_rate*wh.bits_per_sample/8;
	wh.data_chunk_size 	= ndata_chunk_size;								//子块大小,PCM数据大小
	wh.chunk_size      	= ndata_chunk_size+wh.start_pos-8;				// 文件数据大小,该值 + 8 就是整个WAV文件的大小
}

/*------------------------------------------
* filename 文件名称路径(含路径)
* wh： 用来保存文件头的结构体实例
--------------------------------------------*/
void wavFormate(char* filename,wavHead &wh) 
{
	//2.文件头分析
	const int HEAD_LENGTH = 256 * 1024;//256kb
	char buf[HEAD_LENGTH];

	//memcpy(buf,fileBuf,HEAD_LENGTH);
	
	FILE *stream = fopen(filename, "rb");
	fread(buf, 1, HEAD_LENGTH, stream);	
	fclose (stream);
	//记录文件读取位置
	int pos = 0;


	//寻找“RIFF”标记
	while (pos < HEAD_LENGTH) {
		if ((buf[pos] == 'R') && (buf[pos + 1] == 'I') && (buf[pos + 2] == 'F') && (buf[pos + 3] == 'F')) {
			wh.chunk_id[0] = 'R';
			wh.chunk_id[1] = 'I';
			wh.chunk_id[2] = 'F';
			wh.chunk_id[3] = 'F';
			pos += 4;
			break;
		}
		++pos;
	}

	//读取Header部分
	wh.chunk_size = *(int *)&buf[pos];
	pos += 4;
	wh.format[0] = buf[pos];
	wh.format[1] = buf[pos + 1];
	wh.format[2] = buf[pos + 2];
	wh.format[3] = buf[pos + 3];
	pos += 4;


	//寻找“fmt”标记
	while (pos < HEAD_LENGTH) {
		if ((buf[pos] == 'f') && (buf[pos + 1] == 'm') && (buf[pos + 2] == 't')) {
			wh.fmt_chunk_id[0] = 'f';
			wh.fmt_chunk_id[1] = 'm';
			wh.fmt_chunk_id[2] = 't';
			wh.fmt_chunk_id[3] = ' ';	//这个可别漏了哦
			pos += 4;
			break;
		}
		++pos;
	}

	//读取Format Chunk部分
	wh.fmt_chunk_size = *(int *)&buf[pos];
	pos += 4;
	wh.audio_fomat = *(short *)&buf[pos];
	pos += 2;
	wh.num_channels = *(short *)&buf[pos];
	pos += 2;
	wh.sample_rate = *(int *)&buf[pos];
	pos += 4;
	wh.byte_rate = *(int *)&buf[pos];
	pos += 4;
	wh.block_align = *(short *)&buf[pos];
	pos += 2;
	wh.bits_per_sample = *(short *)&buf[pos];
	pos += 2;


	//寻找“data”标记
	while (pos < HEAD_LENGTH) {
		if ((buf[pos] == 'd') && (buf[pos + 1] == 'a') && (buf[pos + 2] == 't') && (buf[pos + 3] == 'a'))
		{
			wh.data_chunk_id[0] = 'd';
			wh.data_chunk_id[1] = 'a';
			wh.data_chunk_id[2] = 't';
			wh.data_chunk_id[3] = 'a';
			pos += 4;
			break;
		}
		++pos;
	}

	//读取Data Chunk的非data部分
	wh.data_chunk_size = *(int *)&buf[pos];
	pos += 4;

	//记录真正音频数据的开始位置
	wh.start_pos = pos;

	//计算文件总帧数
	wh.num_frame = wh.data_chunk_size / (wh.num_channels*(wh.bits_per_sample / 8));
	printf("[0]sample_rate(%d),byte_rate(%d),data_chunk_size(%d),chunk_size(%d),fmt_chunk_size(%d),start_pos(%d) @ %s.",\
		wh.sample_rate,		\
		wh.byte_rate,		\
		wh.data_chunk_size,	\
		wh.chunk_size,wh.fmt_chunk_size,wh.start_pos,\
		filename);
	//3.数据转写-
	//https://www.cnblogs.com/wangguchangqing/p/5970516.html
	if(wh.num_channels==2 && wh.sample_rate>11025 && wh.bits_per_sample==16)
	{	
		int rate = wh.sample_rate/11025;
		if(rate>1)
		{
			//读取文件原始数据
			int fileLen;
			char *fileBuf = read_File_rb(filename,fileLen); 
		
			FILE* fp = fopen(filename, "wb+");	
		    if (NULL != fp) 
			{
				//写文件头
				fwrite(wh.chunk_id, sizeof(wavHead)-8, 1, fp);
				int16_t temp;
				int32_t tempSumL 	= 0;
				int32_t tempSumR 	= 0;
				int flag          	= 0;
				int data_chunk_size = 0;	//修改后PCM数据长度，单位字节
				
				uint16_t *pdata = (uint16_t *)(fileBuf+wh.start_pos);		//转成16位数据
				int max 	    = (wh.data_chunk_size/2)-rate*2;				
				for(int i=0;i<=max;i+=(rate*2))
				{
					flag	 = 0;
					tempSumL = 0;
					tempSumR = 0;
					// 对于双声道立体声声音文件，每次采样数据为一个16位的整数（int），高八位(左声道)和低八位(右声道)分别代表两个声道。
					while(flag<rate)
					{
						if(flag%2==0)
						{
							temp = pdata[i+flag];
							tempSumL += temp;
						}
						else
						{
							temp = pdata[i+flag+1];
							tempSumR += temp;
						}
						flag++;
					}

					temp = (uint16_t)(tempSumL/rate);
					fwrite(&temp, 2, 1, fp); 
					
					temp = (uint16_t)(tempSumR/rate);
					fwrite(&temp, 2, 1, fp); 
					
					data_chunk_size+=4;					
				}	
				fseek(fp,0,SEEK_SET);	//定位到文件起始位置
				//重写文件头
				wavHeadFormat(wh,11025,data_chunk_size);
				fwrite(wh.chunk_id, sizeof(wavHead)-8, 1, fp);					
		        fclose(fp);
				printf("[1]sample_rate(%d),byte_rate(%d),data_chunk_size(%d),chunk_size(%d),fmt_chunk_size(%d),start_pos(%d) @ %s.",\
					wh.sample_rate,		\
					wh.byte_rate,		\
					wh.data_chunk_size,	\
					wh.chunk_size,wh.fmt_chunk_size,wh.start_pos,\
					filename);	
		    }
			//内存释放
			free(fileBuf);
			fileBuf=NULL;			
		}
	}
}

文件wavEdit.h

#ifndef _WAVEDIT_H_
#define _WAVEDIT_H_

typedef unsigned char               uint8_t;
typedef signed char                 int8_t;
typedef unsigned short int          uint16_t;
typedef signed short int            int16_t;
typedef unsigned int                uint32_t;
typedef signed int                  int32_t;

#pragma pack(push, 1)
typedef struct  
{
	int8_t 		chunk_id[4]	= { 0 };			// 文件标识，通常为"RIFF"
	uint32_t 	chunk_size 	= 0;				// 文件数据大小,该值 + 8 就是整个WAV文件的大小
	int8_t 		format[4]	= { 0 };			// 文件格式，“WAVE”
	int8_t 		fmt_chunk_id[4]	= { 0 };		// 子块标识，“fmt”
	uint32_t 	fmt_chunk_size 	= 0;			// 子块大小,其数值不确定,取决于编码格式。可以是16、18、20、40 等
	uint16_t 	audio_fomat  	= 0;			// 音频格式，1为PCM格式
	uint16_t 	num_channels 	= 0;			// 声道数，1:单声道，2:双声道(立体声)
	uint32_t 	sample_rate    	= 0;			// 采样率，常用的采样频率有 11025，22050和44100KHZ
	uint32_t 	byte_rate      	= 0;			// 每秒的字节数，值=声道数x采样率x每样本的数据位数/8。播放软件利用这个值可以估计缓冲区大小。
	uint16_t 	block_align  	= 0;			// 块对齐 采样帧大小。该数值为:声道数x位数/8。播放软件需要一次处理多个该值大小的字节数据,用该数值调整缓冲区
	uint16_t 	bits_per_sample = 0;			// 采样深度，即采样位数。常见是4、8、12、16、24、32
	int8_t 		data_chunk_id[4]	= { 0 };	// 子块标识，“data”
	uint32_t	data_chunk_size   	= 0;		// 子块大小
	int32_t		num_frame = 0;					// 文件总帧数
	int32_t		start_pos = 0;					// 音频数据的开始位置
}wavHead;

#pragma pack(pop)

void wavFormate(char* filename,wavHead &wh);

#endif

用法

#include ...
#include "wavEdit.h"
int main(int argc, char* argv[])
{
    char m_audioFile[256]="xx.wav";      //音频文件（双声道，采样率大于11025，采样深度，即位宽为16）
    wavHead wh;
    wavFormate(m_audioFile, wh);
    return 0;
}

以上，完。