这两日的题是极难的。

9.25

看了看loj上上届奆佬的提交记录，这似乎是他们省选前做的题（

所以很难改的动……

A

容易发现矩阵C是矩阵B的转置矩阵。赛时想到矩阵点乘转置矩阵可以得到对称矩阵，向这个方向思考后无果。

然后显然矩阵A任意一点的点值就是它到四周的切比雪夫距离（或者再加个 $1$ ）

于是想了一想打了一个可以 $O(1)$ 求出任意点矩阵B点值的函数，由转置可以同样快速求出矩阵C的点值，然后过了前两个包。

赛后在wx奆佬很快会了这题。发现上边没有完全充分利用螺旋矩阵的性质（虽然那个函数也完全基于这个性质）。

容易发现对于矩阵的每一个正方形的框除左上角顶点外满足一种特殊形式，看一下的话会很显然但是打字不好表述。

于是考虑枚举当前在第几个环，为了减少边界条件的判断把每个环的每条边单独拆出来，然后分别与询问区间判断交集并计算贡献，思维难度就到这里。

然后就是代码实现上的一些小细节。注意下的话问题应当不大。

因为没写几道题的代码所以放一下这题：

#include<bits/stdc++.h>
namespace Pozhu{
using namespace std;
#define N 1010
const int mod = 1e8;
typedef __int128_t ll;
typedef long long lld;
ll n;
ll x1,x2,y1,y2;
ll ans;
int _n,_x1,_x2,_y1,_y2;
ll s1(ll );
ll s2(ll );
ll get(ll ,int );
ll get_a(ll ,ll );
ll get_b(ll ,ll );

void main()
{
    cin >> _n >> _x1 >> _y1 >> _x2 >> _y2;
    n = _n,x1 = _x1,x2 = _x2,y1 = _y1,y2 = _y2;
    for(ll i = 1;i <= (n + 1) >> 1;i++)
    {
        ll sum = 0;
        ll val = get_b(i,i);
        // ll val = (4 * (i - 1) * (n - i + 1) + 1) % mod;
        if(x1 <= i && i <= x2 && y1 <= i && i <= y2)
            sum += val * val % mod;
        (sum += get(i,1)) %= mod;
        (sum += get(i,2)) %= mod;
        (sum += get(i,3)) %= mod;
        (sum += get(i,4)) %= mod;
        ans = (ans + sum * i) % mod;
    }
    printf("%lld",(lld)ans);
    return;
}

ll get(ll a,int type)
{
    ll L = get_b(a,a);
    // ll L = (4 * (a - 1) * (n - a + 1) + 1) % mod;
    ll R = 4 * a * (n-a) % mod;
    ll lval = 0,rval = 0;
    ll tot = n - a * 2 + 1;
    if(type == 1)
    {
        if(a < x1 || a > x2) return 0;
        if(y1 > n - a || y2 < a + 1) return 0;
        lval = max(a + 1,y1) - a + L - 1;
        rval = min(n - a,y2) - a + L;
    }
    if(type == 2)
    {
        if(y2 < n - a + 1 || y1 > n - a + 1) return 0;
        if(x2 < a || x1 > n - a) return 0;
        lval = max(a,x1) - a + L + tot - 1;
        rval = min(n - a,x2) - a + L + tot;
    }
    if(type == 3)
    {
        if(x2 < n - a + 1 || n - a + 1 < x1) return 0;
        if(y2 < a + 1 || n - a + 1 < y1) return 0;
        lval = n - a + 1 - min(n - a + 1,y2) + L + 2 * tot - 1;
        rval = n - a + 1 - max(a + 1,y1) + L + 2 * tot;
    }
    if(type == 4)
    {
        if(y2 < a || a < y1) return 0;
        if(x2 < a + 1 || n - a + 1 < x1) return 0;
        lval = n - a + 1 - min(n - a + 1,x2) + L + 3 * tot - 1;
        rval = n - a + 1 - max(a + 1 ,x1) + L + 3 * tot;
    }
    ll tmp1 = (s1(rval) - s1(lval) + mod) % mod * (L + R + 1) % mod;
    ll tmp2 = (s2(rval) - s2(lval) + mod) % mod;
    return (tmp1 - tmp2 + mod) % mod;
}

ll get_b(ll x,ll y)
{
    ll a = get_a(x,y);
    ll ans = 1ll * n * n - (n - a * 2 + 2) * (n - a * 2 + 2);
    if(x > a) ans += n - 2 * a + 1;
    else return ans + y - a + 1;
    if(y < n - a + 1) ans += n - 2 * a + 1;
    else return ans + x - a + 1;
    if(x < n - a + 1) ans += n - 2 * a + 1;
    else return ans + n - a + 1 - y + 1;
    return ans + n - a + 1 - x + 1;
}

ll get_a(ll x,ll y)
{
    return min({x,y,n-x+1,n-y+1});
}

ll s2(ll x)
{
    return x * (x + 1) / 2 * (x * 2 + 1) / 3 % mod;
}

ll s1(ll x)
{
    return x * (x + 1) / 2 % mod;
}

}

signed main()
{
    Pozhu :: main();
    return 0;
}

B

~~谁能想到这玩意会和多项式扯上关系~~

这里有一个和之前做过的某题类似的trick ：

对于要求一段时间内不碰底线向前走最终才回来，可以钦定第一步向前，最后一步向后，中间化成一个子问题。

当时刚刚接触这个trick时不很理解，现在听到的时候忽然明白了。

然后通过这个trick可以求出某生成函数的封闭形式，然后搞一搞可以搞出答案，但是复杂度是 $O(n \log ^2 n)$ 的，瓶颈在多项式快速幂且常数项为 $0$ ,大概是过不去。

然后大概是有两种解决办法，像zyy一样玄学推式子把常数项化成 $1$ 然后使用 ln exp 的多项式快速幂，或者使用整式递推（然后两种都不会，~~No one yes man~~）

代码没来得及写并且也不会写。

C

某菱形计数从二维变成三维（这里倒是想到了）然后和高深的矩阵行列式扯上了关系（？？？？？

还有一种神仙做法是使用young表，以前从未听说过这玩意

然后不会

D

奆佬JKY给讲懂了，然后是 $O(n \sqrt{n})$ 做法（根据我俩玄学分析）但是能过。

大概就是直接枚举因子和公差，公差的上下界显然易得。

然后进行一个对每个公差的方案数统计，具体做法是先按照升序排列的排序进行一个排列，显然第一个最小值是 $1$ ，然后根据公差可以找到剩下的所有最小值，把这个数列分成几块（然后最后一块要大得多）

发现每一块内除了最小值之外的数在最终排列中只能放在这一块或者之前的块中，因为往后放的话会改变后边块的最小值。

于是从最后一块向前更新答案，每次把剩余数字的个数加上当前块内非最小值数的个数，然后从剩余的数字中减去需要的数的个数求个排列，这样做下去。

最后考虑一下最小值可以在块内的任何一个位置，所以加一个修正，具体做法就是乘上一个 $(块长) ^ {(块数)}$ 就是当前因子的当前公差的答案了。

这样做完似乎很暴力，然后把因子是 $1$ 的情况特判一下单独加上可以减小一个 $n$ 的常数，然后大致就能过，非常玄学。

思想懂了，代码没来得及写，但是实现是显然而简明的，所以就咕了。

9.26

A

讨论许久无果。

或许后来有奆佬搞出来了（？但是我不会

B

现在觉得是简单题

甚至只需要拆个因子然后我调了 一整天

虽然大概是第一次尝试自己实现这个东西。

赛时模模糊糊地大概会了正解（虽然比后来的写法麻烦的多还得用CRT合并贡献）。

事实是由于只有单点除，然后模数的质因子最多只有 $9$ 个，于是把这九个质因子分别都拆出来，然后用线段树分别维护每个质因子的幂，所以剩下的因数一定与模数互质也一定有逆元。

模数不是质数不能用费马小定理求逆元

然后因为这个调了很久，警钟长鸣。

还有就是可能会给一个巨大质数当模数这样开头分解质因数就会直接寄掉，方案是先扫到最多 $\sqrt{n}$ 然后如果没有因子就直接把 $n$ 扔进去否则继续扫，相当于在扫的同时判断了一下质数。

似乎还可以优化因为这个似乎还能被卡不过没有数据在卡所以不优化了。再优化的话应该可以考虑把它写成一个递归形式，对于 $2 \times 1e9+7$ 这样的模数应该也能挡得住。

所以放一下代码吧……但是它有400+行它真的是太臭了

#include<bits/stdc++.h>
namespace Pozhu{
using namespace std;
namespace fast_IO
{
    #define FAST_IO
    #define IOSIZE 100000
    typedef long long ll;
    typedef double db;
    typedef long double ldb;
    typedef __int128_t i128;
    char ibuf[IOSIZE], obuf[IOSIZE];
    char *p1 = ibuf, *p2 = ibuf, *p3 = obuf;
    #ifdef ONLINE_JUDGE
        #define getchar() ((p1==p2)and(p2=(p1=ibuf)+fread(ibuf,1,IOSIZE,stdin),p1==p2)?(EOF):(*p1++))
        #define putchar(x) ((p3==obuf+IOSIZE)&&(fwrite(obuf,p3-obuf,1,stdout),p3=obuf),*p3++=x)
    #endif//fread in OJ, stdio in local
 
    #define isdigit(ch) (ch>47&&ch<58)
    #define isspace(ch) (ch<33&&ch!=EOF)
     
    struct fast_IO_t {
        ~fast_IO_t() {
            fwrite(obuf, p3-obuf, 1, stdout);
        }
        bool flag = false;
        operator bool() {
            return flag;
        }
    }io;
     
    template<typename T> inline T read() {
        T s = 0; int w = 1; char ch;
        while(ch=getchar(), !isdigit(ch)&&(ch!=EOF))
            if(ch == '-') w = -1;
        if(ch == EOF) return 0;
        while(isdigit(ch))
            s = s*10+ch-48, ch=getchar();
        if(ch == '.') {
            ll flt = 0; int cnt = 0;
            while(ch=getchar(), isdigit(ch))
                if(cnt < 18) flt=flt*10+ch-48, cnt++;
            s += (db)flt/pow(10,cnt);
        }
        return s *= w;
    }
    template<typename T> inline bool read(T &s) {
        s = 0; int w = 1; char ch;
        while(ch=getchar(), !isdigit(ch)&&(ch!=EOF))
            if(ch == '-') w = -1;
        if(ch == EOF) return false;
        while(isdigit(ch))
            s = s*10+ch-48, ch=getchar();
        if(ch == '.') {
            ll flt = 0; int cnt = 0;
            while(ch=getchar(), isdigit(ch))
                if(cnt < 18) flt=flt*10+ch-48, cnt++;
            s += (db)flt/pow(10,cnt);
        }
        return s *= w, true;
    }
    inline bool read(char &s) {
        while(s = getchar(), isspace(s));
        return s != EOF;
    }
    inline bool read(char *s) {
        char ch;
        while(ch=getchar(), isspace(ch));
        if(ch == EOF) return false;
        while(!isspace(ch))
            *s++ = ch, ch=getchar();
        *s = '\000';
        return true;
    } 
    template<typename T> void print(T x) {
        static int t[20]; int top = 0;
        if(x < 0) putchar('-'), x = -x;
        do { t[++top] = x%10; x /= 10; } while(x);
        while(top) putchar(t[top--]+48);
    }
    struct empty_type{}; int pcs = 8;
    empty_type setpcs(int cnt) {
        return pcs = cnt, empty_type();
    }
    inline void print(empty_type x){}
    inline void print(double x) {
        if(x < 0) putchar('-'), x = -x;
        x += 5.0 / pow(10,pcs+1);
        print((ll)(x)); x -= (ll)(x);
        if(pcs != 0) putchar('.');
        for(int i = 1; i <= pcs; i++)
        x *= 10, putchar((int)x+'0'), x -= (int)x;
    }
    inline void print(float x) {
        if(x < 0) putchar('-'), x = -x;
        x += 5.0 / pow(10,pcs+1);
        print((ll)(x)); x -= (ll)(x);
        if(pcs != 0) putchar('.');
        for(int i = 1; i <= pcs; i++)
        x *= 10, putchar((int)x+'0'), x -= (int)x;
    }
    inline void print(long double x) {
        if(x < 0) putchar('-'), x = -x;
        x += 5.0 / pow(10,pcs+1);
        print((i128)(x)); x -= (i128)(x);
        if(pcs != 0) putchar('.');
        for(int i = 1; i <= pcs; i++)
        x *= 10, putchar((int)x+'0'), x -= (int)x;
    }
    inline void print(char x) {
        putchar(x);
    }
    inline void print(char *x) {
        for(int i = 0; x[i]; i++)
            putchar(x[i]);
    }
    inline void print(const char *x) {
        for(int i = 0; x[i]; i++)
            putchar(x[i]);
    }
    #ifdef _GLIBCXX_STRING//string
        inline bool read(std::string& s) {
            s = ""; char ch;
            while(ch=getchar(), isspace(ch));
            if(ch == EOF) return false;
            while(!isspace(ch))
                s += ch, ch = getchar();
            return true;
        }
        inline void print(std::string x) {
            for(string::iterator i = x.begin(); i != x.end(); i++)
                putchar(*i);
        }
        inline bool getline(fast_IO_t &io, string s) {
            s = ""; char ch = getchar();
            if(ch == EOF) return false;
            while(ch != '\n' and ch != EOF)
                s += ch, ch = getchar();
            return true;
        }
    #endif
    #if __cplusplus >= 201103L
        template<typename T, typename... T1>
        inline int read(T& a, T1& ...other) {
            return read(a)+read(other...);
        }
        template<typename T, typename... T1>
        inline void print(T a, T1... other) {
            print(a); print(other...);
        }
    #endif
    template<typename T>
    fast_IO_t& operator >> (fast_IO_t &io, T &b) {
        return io.flag=read(b), io;
    }
    fast_IO_t& operator >> (fast_IO_t &io, char *b) {
        return io.flag=read(b), io;
    }
    template<typename T>
    fast_IO_t& operator << (fast_IO_t &io, T b) {
        return print(b), io;
    }
    #define cout io
    #define cin io
    #define endl '\n'
}
using namespace fast_IO;
#define N 500010
#define K 10
 
int n;
int a[N];
int mod_num[K];
int power_mod[K][N * 20 + 10];
int tot_mod;
int p,q;
 
void init()
{
    for(int i = 1;i <= tot_mod;i++)
    {
        power_mod[i][0] = 1;
        for(int j = 1;j <= n * 20;j++)
        power_mod[i][j] = 1ll * power_mod[i][j-1] * mod_num[i] % p;
    }
}
 
void exgcd(ll a,ll b,ll &x,ll &y)
{
    if(!b) {
        x = 1,y = 0;
        return;
    }
    exgcd(b,a%b,y,x);
    y -= a/b * x;
}
 
struct Change
{
    ll x[K],y;
    void get_ch(ll k)
    {
        for(int num = 1;num <= tot_mod;num++)
        {
            x[num] = 0;
            while(k && k % mod_num[num] == 0)
            x[num]++,k /= mod_num[num];
        }
        y = k;
    }
}ch;
 
struct Tree
{
    int l,r;
    ll sum;
    int x[K];
    ll y;
    void get_sum()
    {
        sum = y;
        for(int num = 1;num <= tot_mod;num++)
            sum = sum * power_mod[num][x[num]] % p;
    }
}t[N << 2];
 
#define ls(x) (x << 1)
#define rs(x) (x << 1 | 1)
 
void push_up(int );
void push_down(int );
ll query(int ,int ,int );
void del(int ,int ,int );
void build(int ,int ,int );
void change(int ,int ,int );
 
void main()
{
    cin >> n >> p;
    int _p = p;
    int top = sqrt(p);
    for(int i = 2;i <= top;i++)
    {
        if(_p % i == 0)
        {
            mod_num[++tot_mod] = i;
            while(_p % i == 0) _p /= i;
        }
    }
    if(tot_mod)
    {
        for(int i = min(_p+1,top+1);i <= _p;i++)
        if(_p % i == 0)
        {
            mod_num[++tot_mod] = i;
            while(_p % i == 0) _p /= i;
        }
    }
    else mod_num[++tot_mod] = p;
    init();
    for(int i = 1;i <= n;i++) cin >> a[i];
    build(1,1,n);
    cin >> q;
    int opt,l,r,x,pos;
    for(int i = 1;i <= q;i++)
    {
        cin >> opt;
        if(opt == 1)
        {
            cin >> l >> r >> x;
            ch.get_ch(x);
            change(1,l,r);
        }
        if(opt == 2)
        {
            cin >> pos >> x;
            del(1,pos,x);
        }
        if(opt == 3)
        {
            cin >> l >> r;
            cout << query(1,l,r) << '\n';
        }
    }
    return;
}
 
ll query(int x,int l,int r)
{
    if(t[x].l >= l && t[x].r <= r) return t[x].sum;
    ll ans = 0;
    push_down(x);
    if(t[ls(x)].r >= l) ans += query(ls(x),l,r);
    if(t[rs(x)].l <= r) ans += query(rs(x),l,r);
    return ans % p;
}
 
void del(int x,int pos,int k)
{
    if(t[x].l == t[x].r)
    {
        for(int num = 1;num <= tot_mod;num++)
        {
            while(k && k % mod_num[num] == 0)
            t[x].x[num]--,k /= mod_num[num];
        }
        ll inv,asd;
        exgcd(k,p,inv,asd);
        inv = (inv % p + p) % p;
        t[x].y = t[x].y * inv % p;
        t[x].get_sum();
        return;
    }
    push_down(x);
    if(t[ls(x)].r >= pos) del(ls(x),pos,k);
    else del(rs(x),pos,k);
    push_up(x);
}
 
void change(int x,int l,int r)
{
    if(t[x].l >= l && t[x].r <= r)
    {
        if(t[x].l == t[x].r)
        {
            for(int num = 1;num <= tot_mod;num++)
            {
                t[x].x[num] += ch.x[num];
            }
            t[x].y = t[x].y * ch.y % p;
            t[x].get_sum();
        }
        else
        {
            t[x].y = t[x].y * ch.y % p;
            t[x].sum = t[x].sum * ch.y % p;
            for(int num = 1;num <= tot_mod;num++)
            {
                t[x].x[num] += ch.x[num];
                t[x].sum = t[x].sum * power_mod[num][ch.x[num]] % p;
            }
        }
        return;
    }
    push_down(x);
    if(t[ls(x)].r >= l) change(ls(x),l,r);
    if(t[rs(x)].l <= r) change(rs(x),l,r);
    push_up(x);
}
 
void build(int x,int l,int r)
{
    t[x].l = l,t[x].r = r;
    t[x].y = 1;
    if(l == r)
    {
        int _a = a[l];
        for(int num = 1;num <= tot_mod;num++)
        {
            while(_a && _a % mod_num[num] == 0)
            t[x].x[num]++,_a /= mod_num[num];
        }
        t[x].y = _a;
        t[x].get_sum();
        return;
    }
    int mid = (l + r) >> 1;
    build(ls(x),l,mid);
    build(rs(x),mid+1,r);
    push_up(x);
    return;
}
 
void push_up(int x)
{
    t[x].sum = (t[ls(x)].sum + t[rs(x)].sum) % p;
}
 
void push_down(int x)
{
    t[ls(x)].sum = t[ls(x)].sum * t[x].y % p;
    t[rs(x)].sum = t[rs(x)].sum * t[x].y % p;
    for(int num = 1;num <= tot_mod;num++)
    {
        if(t[x].x[num] == 0) continue;
        t[ls(x)].x[num] += t[x].x[num];
        t[rs(x)].x[num] += t[x].x[num];
         
        t[ls(x)].sum = t[ls(x)].sum * power_mod[num][t[x].x[num]] % p;
        t[rs(x)].sum = t[rs(x)].sum * power_mod[num][t[x].x[num]] % p;
        t[x].x[num] = 0;
    }
    if(t[x].y == 1) return;
    (t[ls(x)].y *= t[x].y) %= p;
    (t[rs(x)].y *= t[x].y) %= p;
    t[x].y = 1;
    return;
}
 
}
 
signed main()
{
    Pozhu :: main();
    return 0;
}

C

有奆佬会了，但是因为某些原因不敢问。

事发时还在卡B题的常，所以前边没问，事发后当然不敢问（

然后去luogu上看了眼，大概能明白点但是不懂。

代码似乎很好写所以贺了一半

但是是 $n^2$ 的。

优化的话得写个多项式

啊考虑啥时候补一下天坑，现在的重头暂时放在数学的其他板块，看群论看的多一些，，考虑过了这一部分之后回去补多项式。

到这里吧。

by Pozhu
2022.9.27