不引用第三方库读取wav文件并逐帧估算基频
借助大模型实现:请用C语言实现,读取wav文件的数据,并估算每一帧的基频,输出每帧的基频值,最后求平均值,不要有省略的功能代码,不引用第三方专用库,不考虑运行效率,但是要求准确。
#include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <math.h> // WAV文件头结构 typedef struct { char riff[4]; uint32_t overall_size; char wave[4]; char fmt_chunk_marker[4]; uint32_t length_of_fmt; uint16_t format_type; uint16_t channels; uint32_t sample_rate; uint32_t byte_rate; uint16_t block_align; uint16_t bits_per_sample; char data_chunk_header[4]; uint32_t data_size; } WAV_HEADER; // 读取WAV文件 int16_t* read_wav_file(const char* filename, uint32_t* num_samples, uint32_t* sample_rate) { FILE* file = fopen(filename, "rb"); if (!file) { perror("Error opening file"); exit(EXIT_FAILURE); } WAV_HEADER header; fread(&header, sizeof(WAV_HEADER), 1, file); if (header.format_type != 1) { fprintf(stderr, "Unsupported format type: %u\n", header.format_type); fclose(file); exit(EXIT_FAILURE); } *sample_rate = header.sample_rate; *num_samples = header.data_size / (header.bits_per_sample / 8); int16_t* data = (int16_t*)malloc(*num_samples * sizeof(int16_t)); fread(data, sizeof(int16_t), *num_samples, file); fclose(file); return data; } // 计算自相关函数 double autocorrelation(int16_t* signal, int length, int lag) { double sum = 0.0; for (int i = 0; i < length - lag; i++) { sum += signal[i] * signal[i + lag]; } return sum; } // 估算基频 double estimate_fundamental_frequency(int16_t* frame, int frame_size, uint32_t sample_rate) { double max_autocorr = -1.0; int best_lag = 0; int min_lag = sample_rate / 500; // 假设最低基频为500Hz int max_lag = sample_rate / 50; // 假设最高基频为50Hz(根据奈奎斯特定理,可以更高) for (int lag = min_lag; lag <= max_lag; lag++) { double autocorr = autocorrelation(frame, frame_size, lag); if (autocorr > max_autocorr) { max_autocorr = autocorr; best_lag = lag; } } return (double)sample_rate / best_lag; } int main(int argc, char* argv[]) { //if (argc != 2) { // fprintf(stderr, "Usage: %s <wav_file>\n", argv[0]); // return EXIT_FAILURE; // } uint32_t num_samples, sample_rate; int16_t* data = read_wav_file("wdsyy1.wav", &num_samples, &sample_rate); int frame_size = 1024; // 帧大小 int hop_size = 512; // 帧移 int num_frames = (num_samples - frame_size) / hop_size + 1; double *fundamental_frequencies = new double[num_frames]; double sum_fundamentals = 0.0; for (int i = 0; i < num_frames; i++) { double fundamental_frequency = estimate_fundamental_frequency(data + i * hop_size, frame_size, sample_rate); fundamental_frequencies[i] = fundamental_frequency; sum_fundamentals += fundamental_frequency; printf("Frame %d: Fundamental Frequency = %.2f Hz\n", i, fundamental_frequency); } double average_fundamental = sum_fundamentals / num_frames; printf("Average Fundamental Frequency = %.2f Hz\n", average_fundamental); free(data); return EXIT_SUCCESS; }
本代码支行结果:
matlab运行代码及结果
wlen=1024;inc=512; %% 读取音频文件 [x, fs] = audioread('wdsyy1.wav'); %% 将音频信号转换为单声道 if size(x, 2) == 2 x = mean(x, 2); end %% 提取基音频率 [f0,idx] = pitch(x,fs, 'WindowLength',wlen,'OverlapLength',inc);%求取语音的基音频率 fn1=size(f0,1); result_f0=f0(find(f0<=400));%筛选符合条件的基音频率 figure() plot(result_f0);%绘制基音频率 title('基音频率f0') xlabel('帧') ylabel('频率/Hz') axis([0 fn1 0 400])%设置坐标轴范围
凯特网版权声明:以上内容允许转载,但请注明出处,谢谢!