令d[i]为第i个样本数据,cnt为样本个数,经过化简可得
\[ans=\frac{\sum(d[i]^2)}{cnt}-(\frac{\sum d[i]}{cnt})^2\]
枚举每一种可能的三核苷酸,得到它出现的各个位置,假设当前出现了tot个,第i个的编号为a[i],经过化简可得
\[cnt+=C_{tot}^2\]
\[\sum d[i]+=\sum (a[i+1]-a[i])i(tot-i)\]
\[\sum (d[i]^2)+=tot\sum(a[i]^2)-(\sum a[i])^2\]
时间复杂度$O(n)$。
#include#include #define N 100010typedef long long ll;int T,n,i,j,tot[64],q[64][N];ll cnt,sumd,sumd2,s,s2;char a[N];inline ll C2(ll x){return x*(x-1)/2;}inline ll sqr(ll x){return x*x;}double sqr(double x){return x*x;}double solve(){ scanf("%s",a+1);n=strlen(a+1); for(i=1;i<=n;i++){ if(a[i]=='A')a[i]=0; else if(a[i]=='G')a[i]=1; else if(a[i]=='C')a[i]=2; else a[i]=3; } for(cnt=sumd=sumd2=i=0;i<64;i++)tot[i]=0; for(i=1;i <<2)|(a[i+2]<<4),q[j][++tot[j]]=i; for(i=0;i<64;i++)if(tot[i]>=2){ cnt+=C2(tot[i]); for(j=1;j