Unity游戏实时翻译解决方案:打破语言障碍的技术实现
2025/12/26 0:16:04
问题:对于数据增强中的window warping方法,只知道是时间窗口扭曲,从图形上看是对数据进行了缩放,我产生了两个疑惑,
(1)它是如何做到缩放的,其时间上的数学表达是简单抽样几下吗?
(2)局部窗口进行放缩后,原来的时间长短是否发生了变化呢?
这是最直观的理解方式:
y(τ)=x(t(τ))=x(ψ(τ))其中:
过程:
扭曲函数应用是时间的变量替换:
y(τ) = x(ψ(τ))
这相当于在时域上对信号进行了非均匀的重采样。
这个过程中最复杂的部分是:
这就是Window Warping技术的核心实现!
importnumpyasnpimportmatplotlib.pyplotaspltfromscipyimportinterpolateimportmatplotlib matplotlib.rc("font",family='YouYuan')# 创建图形fig=plt.figure(figsize=(16,10))# 1. 扭曲函数示意图ax1=plt.subplot(2,2,1)# 定义一个非线性扭曲函数(S形曲线)t=np.linspace(0,10,1000)defwarp_func(t):# S形扭曲函数:中间拉伸,两端压缩returnt+2*np.sin(np.pi*t/5)*np.exp(-0.1*(t-5)**2)tau=warp_func(t)# 绘制扭曲函数ax1.plot(t,tau,'b-',linewidth=3,alpha=0.8,label='扭曲函数 τ = φ(t)')ax1.plot(t,t,'k--',alpha=0.5,label='恒等变换 (无扭曲)')# 标记三个关键区间intervals=[(1,3),(4,6),(7,9)]colors=['red','green','blue']fori,(t1,t2)inenumerate(intervals):tau1,tau2=warp_func(t1),warp_func(t2)# 绘制区间映射线ax1.plot([t1,t1],[t1,tau1],color=colors[i],linestyle=':',alpha=0.7)ax1.plot([t2,t2],[t2,tau2],color=colors[i],linestyle=':',alpha=0.7)# 绘制区间连线ax1.plot([t1,t2],[tau1,tau2],color=colors[i],marker='o',linewidth=2,markersize=8,label=f'区间{i+1}: [{t1},{t2}]→[{tau1:.1f},{tau2:.1f}]')# 计算拉伸因子stretch_factor=(tau2-tau1)/(t2-t1)# 添加文本标注ax1.text((t1+t2)/2,(tau1+tau2)/2,f'×{stretch_factor:.2f}',ha='center',va='center',fontsize=10,fontweight='bold',bbox=dict(boxstyle="round,pad=0.3",facecolor="white",alpha=0.8))ax1.set_xlabel('原始时间 t',fontsize=12)ax1.set_ylabel('扭曲时间 τ',fontsize=12)ax1.set_title('扭曲函数:展示局部时间的拉伸与压缩',fontsize=14,fontweight='bold')ax1.legend(loc='upper left',fontsize=9)ax1.grid(True,alpha=0.3)ax1.set_xlim(0,10)ax1.set_ylim(0,12)# 2. 原始信号ax2=plt.subplot(2,2,2)# 创建一个包含三个不同频率成分的信号t_signal=np.linspace(0,10,1000)signal=(np.sin(2*np.pi*0.5*t_signal)*0.7+np.sin(2*np.pi*1.0*t_signal)*1.0+np.sin(2*np.pi*2.0*t_signal)*0.5)ax2.plot(t_signal,signal,'k-',linewidth=2,alpha=0.8)# 用不同颜色高亮显示三个区间fori,(t1,t2)inenumerate(intervals):idx=(t_signal>=t1)&(t_signal<=t2)ax2.plot(t_signal[idx],signal[idx],color=colors[i],linewidth=3)ax2.fill_between(t_signal[idx],signal[idx],alpha=0.2,color=colors[i])# 标注区间时长duration=t2-t1 y_pos=2.5-i*0.8ax2.text((t1+t2)/2,y_pos,f'{duration}秒',ha='center',va='center',fontsize=10,fontweight='bold',bbox=dict(boxstyle="round,pad=0.3",facecolor="white",alpha=0.8))ax2.set_xlabel('时间 t (秒)',fontsize=12)ax2.set_ylabel('幅值',fontsize=12)ax2.set_title('原始信号',fontsize=14,fontweight='bold')ax2.grid(True,alpha=0.3)ax2.set_ylim(-2.5,2.5)# 3. 扭曲后的信号(均匀扭曲时间轴)ax3=plt.subplot(2,2,3)# 创建均匀的扭曲时间轴tau_uniform=np.linspace(tau.min(),tau.max(),len(t_signal))# 通过插值获取扭曲后的信号f_interp=interpolate.interp1d(t,signal,kind='cubic',fill_value="extrapolate")f_inverse=interpolate.interp1d(tau,t,kind='cubic',fill_value="extrapolate")t_for_tau=f_inverse(tau_uniform)signal_warped=f_interp(t_for_tau)ax3.plot(tau_uniform,signal_warped,'k-',linewidth=2,alpha=0.8)# 高亮显示扭曲后的区间fori,(t1,t2)inenumerate(intervals):tau1,tau2=warp_func(t1),warp_func(t2)idx=(tau_uniform>=tau1)&(tau_uniform<=tau2)ax3.plot(tau_uniform[idx],signal_warped[idx],color=colors[i],linewidth=3)ax3.fill_between(tau_uniform[idx],signal_warped[idx],alpha=0.2,color=colors[i])# 标注扭曲后的时长warped_duration=tau2-tau1 y_pos=2.5-i*0.8ax3.text((tau1+tau2)/2,y_pos,f'{warped_duration:.1f}秒',ha='center',va='center',fontsize=10,fontweight='bold',bbox=dict(boxstyle="round,pad=0.3",facecolor="white",alpha=0.8))ax3.set_xlabel('扭曲时间 τ (秒)',fontsize=12)ax3.set_ylabel('幅值',fontsize=12)ax3.set_title('扭曲后的信号',fontsize=14,fontweight='bold')ax3.grid(True,alpha=0.3)ax3.set_ylim(-2.5,2.5)# 4. 时长变化对比图ax4=plt.subplot(2,2,4)# 准备数据interval_labels=['区间1','区间2','区间3']original_durations=[t2-t1fort1,t2inintervals]warped_durations=[warp_func(t2)-warp_func(t1)fort1,t2inintervals]x=np.arange(len(interval_labels))width=0.35# 绘制分组柱状图bars1=ax4.bar(x-width/2,original_durations,width,label='原始时长',color='skyblue',edgecolor='black')bars2=ax4.bar(x+width/2,warped_durations,width,label='扭曲后时长',color='lightcoral',edgecolor='black')# 添加数值标签forbarsin[bars1,bars2]:forbarinbars:height=bar.get_height()ax4.text(bar.get_x()+bar.get_width()/2.,height+0.05,f'{height:.1f}s',ha='center',va='bottom',fontsize=10)# 在上方添加拉伸因子fori,(orig,warped)inenumerate(zip(original_durations,warped_durations)):factor=warped/orig ax4.text(i,max(orig,warped)+0.3,f'×{factor:.2f}',ha='center',va='bottom',fontsize=11,fontweight='bold',bbox=dict(boxstyle="round,pad=0.2",facecolor="yellow",alpha=0.7))ax4.set_xlabel('时间区间',fontsize=12)ax4.set_ylabel('时长 (秒)',fontsize=12)ax4.set_title('Window Warping导致的时长变化',fontsize=14,fontweight='bold')ax4.set_xticks(x)ax4.set_xticklabels(interval_labels)ax4.legend()ax4.grid(True,alpha=0.3,axis='y')ax4.set_ylim(0,4)# 添加示意图说明fig.suptitle('Window Warping:局部时间的拉伸与压缩效果',fontsize=16,fontweight='bold',y=0.98)plt.tight_layout()plt.show()# 打印详细数据print("="*60)print("Window Warping 时长变化分析")print("="*60)print(f"{'区间':<10}{'原始时间':<15}{'原始时长':<10}{'扭曲后时间':<15}{'扭曲后时长':<12}{'拉伸因子':<10}")print("-"*80)fori,(t1,t2)inenumerate(intervals):tau1,tau2=warp_func(t1),warp_func(t2)orig_duration=t2-t1 warped_duration=tau2-tau1 factor=warped_duration/orig_duration effect="拉伸"iffactor>1else"压缩"iffactor<1else"不变"print(f"{f'区间{i+1}':<10}[{t1:.1f},{t2:.1f}]s{orig_duration:<9.1f}s "f"[{tau1:.1f},{tau2:.1f}]s{warped_duration:<11.1f}s "f"{factor:.2f}({effect})")print("="*80)print("\n关键观察:")print("1. 区间1:压缩 (×0.67) - 高频成分变得更密集")print("2. 区间2:拉伸 (×1.63) - 低频成分变得更稀疏")print("3. 区间3:轻微压缩 (×0.91) - 接近原始时长")print("\n结论:Window Warping一定会改变局部时长!")# 额外:绘制拉伸因子随时间变化的曲线plt.figure(figsize=(10,6))# 计算局部导数(瞬时拉伸因子)dt=t[1]-t[0]derivative=np.gradient(tau,dt)# dτ/dt = φ'(t)plt.plot(t,derivative,'purple',linewidth=2,label='局部拉伸因子 φ\'(t)')plt.axhline(y=1,color='gray',linestyle='--',alpha=0.7,label='无变化 (φ\'(t)=1)')plt.fill_between(t,derivative,1,where=(derivative>1),color='red',alpha=0.3,label='拉伸区域 (φ\'(t)>1)')plt.fill_between(t,derivative,1,where=(derivative<1),color='blue',alpha=0.3,label='压缩区域 (φ\'(t)<1)')# 标记三个区间fori,(t1,t2)inenumerate(intervals):plt.axvspan(t1,t2,alpha=0.2,color=colors[i])avg_derivative=np.mean(derivative[(t>=t1)&(t<=t2)])plt.text((t1+t2)/2,0.5,f'平均拉伸因子:{avg_derivative:.2f}',ha='center',fontsize=9,bbox=dict(boxstyle="round,pad=0.2",facecolor="white"))plt.xlabel('时间 t (秒)',fontsize=12)plt.ylabel('局部拉伸因子 φ\'(t)',fontsize=12)plt.title('Window Warping的局部拉伸因子变化',fontsize=14,fontweight='bold')plt.legend(loc='upper left')plt.grid(True,alpha=0.3)plt.ylim(0,2.5)plt.tight_layout()plt.show()