$$ \text{首先考虑朴素}DP,\text{设}f\left( u,v,l \right) \text{表示走到点}\left( u,v \right) ,\text{路径长度为}l\text{的方案数} \\ \text{转移则有}f\left( u,v,l \right) =\sum_{0\le i<u}{\sum_{1\le j\le n}{f\left( i,j,l-1 \right) w\left( j,v \right)}} \\ \text{显然第三维状态可以用多项式表示。} \\ \text{令}F\left( u,v \right) \text{表示走到横坐标为}u,\text{纵坐标为}v\text{处的多项式} \\ \text{其中}i\text{次项表示路径长度为}i\left( \text{模}k\text{意义下} \right) \text{的方案数。} \\ \text{转移则有}F\left( u,v \right) =\sum_{0\le i<u}{\sum_{1\le j\le n}{w\left( j,v \right) xF\left( i,j \right)}} \\ =\sum_{1\le j\le n}{w\left( j,v \right) x\sum_{0\le i<u}{F\left( i,j \right)}} \\ \text{令}S\left( u,v \right) =\sum_{0\le i\le u}{F\left( i,v \right)} \\ \text{则有}F\left( u,v \right) =\sum_{1\le j\le n}{w\left( j,v \right) xS\left( u-\text{1,}v \right)} \\ \text{由于题目保证}k|\left( p-1 \right) ,\text{故在模}p\text{意义下存在}k\text{次单位根}g^{\frac{p-1}{k}},\text{故直接以单位根代入进去} \\ \text{即可做质数模}k\text{意义下的循环卷积。} \\ \text{每次代入一个点值}x_0,\text{则有} \\ F\left( u,v \right) =\sum_{1\le j\le n}{w\left( j,v \right) x_0S\left( u-\text{1,}v \right)} \\ \text{可以考虑直接矩乘。} \\ \text{例如对于}n=\text{3的情况,构造矩阵} \\ M_k=\left[ \begin{matrix} S\left( k,1 \right)& S\left( k,2 \right)& S\left( k,3 \right)\\ \end{matrix} \right] \\ \text{由于}S\left( k,1 \right) =F\left( k,1 \right) +S\left( k-\text{1,}1 \right) =S\left( k-\text{1,}1 \right) +\sum_{1\le j\le n}{w\left( j,1 \right) x_iS\left( k-\text{1,}j \right)} \\ \text{故构造转移矩阵} \\ T=\left[ \begin{matrix} w\left( \text{1,}1 \right) +1& w\left( \text{1,}2 \right)& w\left( \text{1,}3 \right)\\ w\left( \text{2,}1 \right)& w\left( \text{2,}2 \right) +1& w\left( \text{2,}3 \right)\\ w\left( \text{3,}1 \right)& w\left( \text{3,}2 \right)& w\left( \text{3,}3 \right) +1\\ \end{matrix} \right] \\ \text{由于题目要求可以在任意位置结束,故最终}M_L=M_0T^L\text{即为结果的矩阵。} \\ M_0\left( \text{1,}x \right) =\text{1,其中}x\text{为初始时的纵坐标}. \\ \text{单次求点值的复杂度为}n^3\log m,\text{共计需要}k\text{个点值来确定多项式。} \\ \text{然而很自闭的是}k\text{的长度不一定是2的幂,模数也不是}998244353 \\ \text{所以不能直接}NTT \\ \text{模数的问题比较好解决,}MTT\text{就行了} \\ \text{长度呢?} \\ \text{考虑 Bluestein 算法。} \\ \text{令}a_i\text{为多项式的}i\text{次项前的系数,}\omega _n=g^{\frac{p-1}{n}}\text{为模}p\text{意义下的}n\text{次单位根} \\ X_i\text{为代入}\omega _{n}^{i}\text{的结果,}n\text{为多项式的长度} \\ \text{则有}X_k=\sum_{0\le i\le n-1}{a_i\omega _{n}^{ik}}\left( DFT\text{的定义} \right) \\ \text{则有}X_k=\sum_{0\le i\le n-1}{a_i\omega _{n}^{ik}}=\sum_{0\le i\le n-1}{a_i\omega _{n}^{\frac{2ik}{2}}}=\sum_{0\le i\le n-1}{a_i\omega _{n}^{\frac{-\left( i-k \right) ^2+\left( i^2+k^2 \right)}{2}}} \\ =\sum_{0\le i\le n-1}{a_i\omega _{2n}^{-\left( i-k \right) ^2+\left( -i^2-k^2 \right)}}=\sum_{0\le i\le n-1}{a_i\omega _{2n}^{i^2+k^2}\omega _{2n}^{-\left( i-k \right) ^2}}=\omega _{2n}^{k^2}\sum_{0\le i\le n-1}{a_i\omega _{2n}^{i^2}\times \omega _{2n}^{-\left( k-i \right) ^2}} \\ \text{如果令}f\left( i \right) =a_i\omega _{2n}^{i^2},g\left( i \right) =\omega _{2n}^{-\left( i^2 \right)} \\ \text{则有}X_k=\omega _{2n}^{k^2}\sum_{0\le i\le n-1}{f\left( i \right) g\left( k-i \right)} \\ \text{然后自闭了,}k-i\text{可能是负的。} \\ \text{令}g\left( i \right) =\omega _{2n}^{-\left( i-n \right) ^2}\left( \text{即把}g\text{右移}n\text{位} \right) ,\text{同时把}g\left( i \right) \text{的长度扩大一倍,就避免了下标为负的情况。} \\ X_k=\omega _{2n}^{k^2}\sum_{0\le i\le n-1}{f\left( i \right) g\left( k-i \right)} \\ \text{然后会发现一分都没有,因为这题的所有数据都不满足}2k|p-1 \\ \text{所以只能写多点求值了}.. \\ \text{然后写了个多点求值发现}TLE\text{到0分。} \\ \text{还是得考虑Bluestein及其变种。} \\ \,\,X_k=\sum_{0\le i\le n-1}{\omega _{n}^{-ik}a_i} \\ \text{由于}\left( \begin{array}{c} a+b\\ 2\\ \end{array} \right) =ab+\left( \begin{array}{c} a\\ 2\\ \end{array} \right) +\left( \begin{array}{c} b\\ 2\\ \end{array} \right) \left( \text{直接考虑组合意义} \right) \\ ab=\left( \begin{array}{c} a+b\\ 2\\ \end{array} \right) -\left( \begin{array}{c} a\\ 2\\ \end{array} \right) -\left( \begin{array}{c} b\\ 2\\ \end{array} \right) \\ \,\,X_k=\sum_{0\le i\le n-1}{\omega _{n}^{-\left( \left( \begin{array}{c} i+k\\ 2\\ \end{array} \right) -\left( \begin{array}{c} i\\ 2\\ \end{array} \right) -\left( \begin{array}{c} k\\ 2\\ \end{array} \right) \right)}a_i}=\sum_{0\le i\le n-1}{\omega _{n}^{\left( \begin{array}{c} i\\ 2\\ \end{array} \right) +\left( \begin{array}{c} k\\ 2\\ \end{array} \right) -\left( \begin{array}{c} i+k\\ 2\\ \end{array} \right)}a_i} \\ =\omega _{n}^{\left( \begin{array}{c} k\\ 2\\ \end{array} \right)}\sum_{0\le i\le n-1}{\omega _{n}^{-\left( \begin{array}{c} k+i\\ 2\\ \end{array} \right)}\times a_i\omega _{n}^{\left( \begin{array}{c} i\\ 2\\ \end{array} \right)}} \\ \text{设序列}f\left( n \right) =\omega _{n}^{-\left( \begin{array}{c} n\\ 2\\ \end{array} \right)},g\left( n \right) =a_n\omega _{n}^{\left( \begin{array}{c} n\\ 2\\ \end{array} \right)} \\ \text{则有}X_k=\omega _{n}^{\left( \begin{array}{c} k\\ 2\\ \end{array} \right)}\sum_{0\le i\le n-1}{f\left( k+i \right) \times g\left( i \right)} \\ f,g\text{长度均为}n \\ \text{翻转}g\text{序列,则有}g^R\left( n-i \right) =g\left( i \right) \\ \text{则有}X_k=\omega _{n}^{\left( \begin{array}{c} k\\ 2\\ \end{array} \right)}\sum_{0\le i\le n-1}{f\left( k+i \right) \times g\left( n-i \right)} \\ \left( k+i \right) +\left( n-i \right) =n+k?\text{把}f\text{的长度扩大一倍即可。} \\ $$
#include <assert.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>
// #include
using int_t = long long int;
using std::cin;
using std::cout;
using std::endl;
using real_t = double;
using cpx_t = struct complex;
const int_t LARGE = 5e5;
const real_t PI = acos(-1);
struct complex {
real_t real, imag;
complex(real_t a = 0, real_t b = 0) : real(a), imag(b) {}
complex operator+(const complex& rhs) const {
return complex{real + rhs.real, imag + rhs.imag};
}
complex operator-(const complex& rhs) const {
return complex{real - rhs.real, imag - rhs.imag};
}
complex operator*(const complex& rhs) const {
return complex{real * rhs.real - imag * rhs.imag,
real * rhs.imag + imag * rhs.real};
}
complex& operator*=(const complex& rhs) {
*this = (*this) * rhs;
return *this;
}
complex conj() { return complex{real, -imag}; }
};
void transform(cpx_t* A, int size2, int arg);
void poly_mul(const int* A, int n, const int* B, int m, int* A0, int p);
// void transformX(int* A, int len, int g, int mod);
void transformNTT(int* A, int size2, int arg, int mod, int g);
std::vector<int> poly_dc_mul(const std::vector<int>& A);
void poly_inv(const int* A, int n, int* result);
void poly_div(const int* A, int n, const int* B, int m, int* R);
std::vector<int> poly_eval(const std::vector<int>& poly,
const std::vector<int>& vals);
void transformX(int* A0, int len, int g);
int power(int_t base, int_t index, int_t mod) {
int result = 1;
// base = (base % mod + mod) % mod;
index = (index % (mod - 1) + mod - 1) % (mod - 1);
while (index) {
if (index & 1) result = (int_t)result * base % mod;
index >>= 1;
base = (int_t)base * base % mod;
}
assert(result >= 0);
return result;
}
int_t mod;
struct Matrix {
int_t n;
int_t data[4][4];
int_t* operator[](int_t r) { return data[r]; }
int_t at(int_t r, int_t c) const { return data[r][c]; }
Matrix(int_t n) {
this->n = n;
memset(data, 0, sizeof(data));
}
Matrix operator*(const Matrix& rhs) const {
Matrix result(n);
for (int_t i = 1; i <= n; i++) {
for (int_t j = 1; j <= n; j++) {
for (int_t k = 1; k <= n; k++) {
result[i][j] =
(at(i, k) * rhs.at(k, j) % mod + mod + result[i][j]) %
mod;
}
}
}
return result;
}
Matrix pow(int_t index) const {
Matrix base = *this, result(n);
for (int_t i = 1; i <= n; i++) result[i][i] = 1;
while (index) {
if (index & 1) result = result * base;
base = base * base;
index >>= 1;
}
return result;
}
};
int_t n, k, l, x, y;
int_t mat[4][4];
Matrix makeMatrix(int_t x0) {
Matrix result(n);
for (int_t i = 1; i <= n; i++)
for (int_t j = 1; j <= n; j++) {
result[i][j] = x0 * mat[i][j] % mod;
if (i == j) result[i][j] += 1;
result[i][j] %= mod;
}
return result;
}
std::ostream& operator<<(std::ostream& os, const Matrix& mat) {
for (int_t i = 1; i <= mat.n; i++) {
for (int_t j = 1; j <= mat.n; j++) {
os << mat.at(i, j) << " ";
}
cout << endl;
}
return os;
}
int flips[20][LARGE];
const auto flip = [=](int x, int size2) {
int result = 0;
for (int i = 1; i < size2; i++) {
result |= (x & 1);
x >>= 1;
result <<= 1;
}
return result | (x & 1);
};
int main() {
for (int i = 1; i < 19; i++) {
for (int j = 1; j < (1 << i); j++) flips[i][j] = flip(j, i);
}
cin >> n >> k >> l >> x >> y >> mod;
for (int_t i = 1; i <= n; i++)
for (int_t j = 1; j <= n; j++) cin >> mat[i][j];
Matrix M0(n);
static int A[LARGE + 1];
M0[1][x] = 1;
int_t g = 2;
std::vector<int_t> divs;
int_t px = mod - 1;
assert(px % k == 0);
for (int_t i = 2; i * i <= px; i++) {
if (px % i == 0) {
divs.push_back(i);
if (i * i != px) divs.push_back(px / i);
}
}
for (; g <= mod - 2; g++) {
bool ok = true;
for (int_t x : divs) {
if (power(g, x, mod) == 1 && x != mod - 1) {
ok = false;
break;
}
}
if (ok) break;
}
for (int_t i = 0; i < k; i++) {
int_t gx = power(g, (mod - 1) / k * i, mod);
auto T = makeMatrix(gx);
A[i] = (M0 * T.pow(l))[1][y];
}
transformX(A, k, g);
const int_t leninv = power(k, mod - 2, mod);
for (int_t i = 0; i < k; i++) printf("%lld\n", A[i] * leninv % mod);
return 0;
}
void transformX(int* A0, int len, int g) {
const auto C2 = [](int_t n) -> int_t { return n * (n - 1) / 2; };
int root = power(g, (mod - 1) / len, mod);
static int A[LARGE + 1], B[LARGE + 1], A1[LARGE + 1];
for (int i = 0; i <= len * 2; i++) {
A[i] = power(root, -C2(i), mod);
}
for (int i = 0; i < len; i++) {
B[i] = (int_t)A0[i] * power(root, C2(i), mod) % mod;
}
std::reverse(B, B + len + 1);
poly_mul(A, len * 2, B, len, A1, mod);
for (int i = 0; i < len; i++)
A0[i] = (int_t)A1[i + len] % mod * power(root, C2(i), mod) % mod;
}
void poly_mul(const int* A, int n, const int* B, int m, int* A0, int p) {
int size2 = 0;
while ((1 << size2) < n + m + 1) size2++;
int len = (1 << size2);
const int px = 3e4;
static cpx_t C[LARGE + 1], D[LARGE + 1], G[LARGE + 1], Px[LARGE + 1];
for (int i = 0; i < len; i++) {
if (i <= n) {
C[i] = cpx_t{A[i] / px};
D[i] = cpx_t{A[i] % px};
} else {
C[i] = D[i] = cpx_t();
}
if (i <= m) {
G[i] = cpx_t{B[i] / px, B[i] % px};
} else {
G[i] = cpx_t{0, 0};
}
}
transform(C, size2, 1);
transform(D, size2, 1);
transform(G, size2, 1);
for (int_t i = 0; i < len; i++) {
C[i] = C[i] * G[i];
D[i] = D[i] * G[i];
}
transform(C, size2, -1);
transform(D, size2, -1);
const auto make = [=](real_t x) -> int_t {
assert(x / len >= -1);
return (x / len) + 0.5;
};
for (int i = 0; i <= n + m; i++) {
A0[i] = ((int_t)make(C[i].real) % p * px % p * px % p +
(int_t)make(C[i].imag + D[i].real) % p * px % p +
make(D[i].imag) % p) %
p;
}
}
void transform(cpx_t* A, int size2, int arg) {
int len = (1 << size2);
for (int i = 0; i < len; i++) {
int x = flips[size2][i];
if (x > i) std::swap(A[i], A[x]);
}
for (int i = 2; i <= len; i *= 2) {
for (int j = 0; j < len; j += i) {
for (int k = 0; k < i / 2; k++) {
auto u = A[j + k],
t = cpx_t(cos(2 * PI / i * k), sin(2 * PI / i * k) * arg) *
A[j + k + i / 2];
A[j + k] = u + t;
A[j + k + i / 2] = u - t;
}
}
}
}