题意:
求#\sum _{i=1}^n\sum _{j=1}^n\ ijgcd(i,j)#
#=\sum _{d=1}^n \sum _{i=1}^\frac n d\sum _{j=1}^\frac n d\ ijd^3[gcd(i,j)==1]#
#=\sum _{d=1}^n d^3 \sum _{i=1}^\frac n d\sum _{j=1}^\frac n d\ ij\sum _{k|gcd(i,j)}\mu (k)#
#=\sum _{d=1}^nd^3 \sum _{k=1}^{\frac n d}\mu(k)k^2\sum _{i=1}^{\frac n {kd}}i\sum _{j=1}^{\frac n {kd}}j#
设$S_1(n)=\sum _{i=1}^n i$,则原式
#=\sum _{d=1}^nd^3 \sum _{k=1}^{\frac n d}\mu(k)k^2S_1(\frac n {kd})^2#
#=\sum _{T=1}^nS_1(\frac n T)^2\sum _{k|T} (\frac T k)^3k^2\mu(k)#
#=\sum _{T=1}^nT^2S_1(\frac n T)^2\sum _{k|T} \frac T k\mu(k)#
由$\phi(n)=\sum _{k|n}\frac n k \mu(k)$得,原式
#=\sum _{T=1}^n S_1(\frac n T)^2T^2\phi(T)#
$S_1(\frac n T)^2$可以数论分块,则只需要考虑$T^2\phi(T)$
设$f(T)=T^2\phi(T)$,$g(T)=T^2$
则#(f*g)(T)=\sum _{k|T}k^2\phi(k)(\frac T k )^2#
#=T^2\sum_{k|T}\phi(k) =T^3#
然后进行杜教筛
设$P(n)=\sum_{i=1}^n f(i)$
#\sum _{i=1}^n i^3=\sum _{i=1}^T\sum _{d|i} f(\frac i d)g(d)#
#=\sum _{d=1}^ng(d)\sum _{d|i,d\leq i\leq n}f(\frac i d)#
#=\sum _{d=1}^nd^2\sum _{i=1}^{\frac n d}f(i)#
#=\sum _{d=1}^nd^2P(\frac n d)#
#=P(n)+\sum _{d=2}^nd^2P(\frac n d)#
#P(n)=\sum_{i=1}^ni^3-\sum_{d=2}^nd^2P(\frac n d)#
然后数论分块即可计算原式。
其实这篇文章在我AC以前已经写了很长时间了,不过题目一直没AC也没往外发。
现在AC了,说一下具体的注意事项
- 快速幂不要写错(我调了一天)
- [a,b]的和是S(b)-S(a-1)
- 用int128以减少取模运算(卡常)
- 线性筛多筛几个,我筛了$2\times 10^6$
#include <iostream>
#include <algorithm>
#include <cinttypes>
#include <unordered_map>
#include <map>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/hash_policy.hpp>
using std::cin;
using std::cout;
using std::endl;
using std::map;
using std::unordered_map;
using int_t = long long int;
int_t inv6, inv2;
const int_t RANGE = 2000000;
using lint_t = __int128_t;
int_t mod = 998244353;
int_t n;
int_t fPrefix[RANGE + 1];
template <class T>
void write(T x)
{
if (x < 0)
{
putchar('-');
x *= -1;
}
if (x >= 10)
write(x / 10);
putchar('0' + x % 10);
}
std::ostream &operator<<(std::ostream &os, lint_t lx)
{
write(lx);
return os;
}
lint_t fastPower(lint_t base, int64_t index)
{
lint_t result = 1;
if (index < 0)
{
base = fastPower(base, mod - 2);
index *= -1;
}
while (index)
{
if (index & 1)
{
result = (result * base) % mod;
}
base = (base * base) % mod;
index >>= 1;
}
return result;
}
lint_t S1(lint_t n)
{
return n * (n + 1) % mod * inv2 % mod;
}
lint_t S1(int_t a, int_t b)
{
return (S1(b) - S1(a - 1) + mod) % mod;
}
lint_t S2(lint_t n)
{
return n * (n + 1) % mod * (2 * n + 1) % mod * inv6 % mod;
}
lint_t S2(int_t a, int_t b)
{
return S2(b) - S2(a - 1);
}
lint_t S3(int_t n)
{
lint_t x = S1(n);
return x * x % mod;
}
void process()
{
fPrefix[0] = 0;
static int_t phi[RANGE + 1];
for (int_t i = 1; i <= RANGE; i++)
{
phi[i] = i;
}
for (int_t i = 2; i <= RANGE; i++)
{
if (phi[i] == i)
{
for (int_t j = 1; j * i <= RANGE; j++)
{
phi[j * i] /= i;
phi[j * i] *= (i - 1);
}
}
}
for (int_t i = 1; i <= RANGE; i++)
{
phi[i] = phi[i] * (i * i % mod);
fPrefix[i] = (fPrefix[i - 1] + phi[i]) % mod;
}
}
lint_t prefix(int_t n)
{
if (n <= RANGE)
{
return fPrefix[n];
}
static std::unordered_map<int_t, lint_t> memory;
if (memory.count(n))
{
return memory[n];
}
lint_t result = S3(n);
int_t i = 2;
while (i <= n)
{
int_t next = n / (n / i);
result = (result - (S2(next) - S2(i - 1) + mod) % mod * prefix(n / i) % mod) % mod;
i = next + 1;
}
return memory[n] = result;
}
int main()
{
cin >> mod >> n;
inv6 = fastPower(6, -1);
inv2 = fastPower(2, -1);
process();
int_t i = 1;
lint_t result = 0;
while (i <= n)
{
int_t next = n / (n / i);
result = (result + (prefix(next) - prefix(i - 1) + mod) % mod * S1(n / i) * S1(n / i) % mod) % mod;
i = next + 1;
}
write(result);
return 0;
}
发表回复