range_affine_range

/tmp/solutions/build/range_affine_range_sum-main.cpp:
    1|       |#include <common.h>
    2|       |#include <toy/bit.h>
    3|       |prelude;
    4|       |
    5|       |namespace {
    6|       |
    7|       |constexpr int N = 1e6;
    8|       |constexpr int P = 998244353;
    9|       |
   10|       |struct affine {
   11|       |  u32 a, b;
   12|   121M|  auto operator+(affine t) -> affine {
   13|   121M|    return {u32(u64(t.a) * a % P), u32((u64(t.a) * b + t.b) % P)};
   14|   121M|  }
   15|   121M|  auto operator+=(affine t) -> void { *this = *this + t; }
   16|       |};
   17|       |
   18|       |struct node {
   19|       |  u32 siz;
   20|       |  u32 sum;
   21|       |  affine aff;
   22|   121M|  auto operator+=(affine t) -> void {
   23|   121M|    aff += t;
   24|   121M|    sum = (u64(t.a) * sum + u64(t.b) * siz) % P;
   25|   121M|  }
   26|       |} a[N];
   27|       |
   28|  2.71M|void pushdown(int k) {
   29|  52.6M|  for (int i = log(k) - 1; i >= 0; --i) {
                                                 ^49.9M
  ------------------
  |  Branch (29:28): [True: 94.84%, False: 5.16%]
  ------------------
   30|  49.9M|    affine t = a[k >> i >> 1].aff;
   31|  49.9M|    a[k >> i >> 1].aff = affine{1, 0};
   32|  49.9M|    a[k >> i ^ 0] += t;
   33|  49.9M|    a[k >> i ^ 1] += t;
   34|  49.9M|  }
   35|  2.71M|}
   36|       |
   37|  50.9M|u32 mod(u32 x) { return x < P ? x : x - P; }
                                              ^25.4M^25.4M
  ------------------
  |  Branch (37:25): [True: 50.00%, False: 50.00%]
  ------------------
   38|       |
   39|  2.71M|void pushup(int k) {
   40|  49.9M|  for (k /= 2; k > 0; k /= 2) {
                                    ^47.2M
  ------------------
  |  Branch (40:16): [True: 94.56%, False: 5.44%]
  ------------------
   41|  47.2M|    a[k].sum = mod(a[k * 2].sum + a[k * 2 + 1].sum);
   42|  47.2M|  }
   43|  2.71M|}
   44|       |
   45|       |}; // namespace
   46|       |
   47|     19|int main() {
   48|     19|  rd rd;
   49|     19|  wt wt;
   50|     19|  int n = rd.uh();
   51|     19|  int q = rd.uh();
   52|  2.40M|  for (int i = 0; i < n; ++i) a[n + i].siz = 1, a[n + i].sum = rd.uw();
                                       ^2.40M^2.40M
  ------------------
  |  Branch (52:19): [True: 100.00%, False: 0.00%]
  ------------------
   53|  2.40M|  for (int i = n - 1; i >= 0; --i) {
                                            ^2.40M
  ------------------
  |  Branch (53:23): [True: 100.00%, False: 0.00%]
  ------------------
   54|  2.40M|    a[i].aff = {1, 0};
   55|  2.40M|    a[i].siz = a[2 * i].siz + a[2 * i + 1].siz;
   56|  2.40M|    a[i].sum = mod(a[2 * i].sum + a[2 * i + 1].sum);
   57|  2.40M|  }
   58|  2.71M|  while (q--) {
  ------------------
  |  Branch (58:10): [True: 100.00%, False: 0.00%]
  ------------------
   59|  2.71M|    let t = rd.u1();
   60|  2.71M|    if (t == 0) {
  ------------------
  |  Branch (60:9): [True: 49.99%, False: 50.01%]
  ------------------
   61|  1.35M|      int l = n + rd.uh();
   62|  1.35M|      int r = n + rd.uh() - 1;
   63|  1.35M|      pushdown(l--);
   64|  1.35M|      pushdown(r++);
   65|  1.35M|      int k = log(l ^ r);
   66|  1.35M|      affine x = {rd.uw(), rd.uw()};
   67|  1.35M|      int t, i;
   68|  12.2M|      for (t = ~l & ~(-1 << k), i = 31; t > 0; t -= 1 << i) {
                                                             ^10.9M
  ------------------
  |  Branch (68:41): [True: 88.93%, False: 11.07%]
  ------------------
   69|  10.9M|        i = log(t);
   70|  10.9M|        a[l >> i ^ 1] += x;
   71|  10.9M|      }
   72|  1.35M|      pushup(l >> i);
   73|  12.3M|      for (t = +r & ~(-1 << k), i = 31; t > 0; t -= 1 << i) {
                                                             ^10.9M
  ------------------
  |  Branch (73:41): [True: 89.00%, False: 11.00%]
  ------------------
   74|  10.9M|        i = log(t);
   75|  10.9M|        a[r >> i ^ 1] += x;
   76|  10.9M|      }
   77|  1.35M|      pushup(r >> i);
   78|  1.35M|    }
   79|  2.71M|    if (t == 1) {
  ------------------
  |  Branch (79:9): [True: 50.01%, False: 49.99%]
  ------------------
   80|  1.35M|      int l = n + rd.uh();
   81|  1.35M|      int r = n + rd.uh() - 1;
   82|  1.35M|      u64 sizL = 0, sumL = 0;
   83|  1.35M|      u64 sizR = 0, sumR = 0;
   84|  24.0M|      for (--l, ++r; l ^ r ^ 1;) {
  ------------------
  |  Branch (84:22): [True: 94.36%, False: 5.64%]
  ------------------
   85|  22.7M|        if (~l & 1) sizL += a[l ^ 1].siz, sumL += a[l ^ 1].sum;
                                  ^10.9M
  ------------------
  |  Branch (85:13): [True: 48.07%, False: 51.93%]
  ------------------
   86|  22.7M|        if (+r & 1) sizR += a[r ^ 1].siz, sumR += a[r ^ 1].sum;
                                  ^11.0M
  ------------------
  |  Branch (86:13): [True: 48.45%, False: 51.55%]
  ------------------
   87|  22.7M|        l /= 2, r /= 2;
   88|  22.7M|        sumL = (a[l].aff.a * sumL + a[l].aff.b * sizL) % P;
   89|  22.7M|        sumR = (a[r].aff.a * sumR + a[r].aff.b * sizR) % P;
   90|  22.7M|      }
   91|  1.35M|      sumL = mod(u32(sumL + sumR));
   92|  1.35M|      sizL += sizR;
   93|  3.75M|      for (l /= 2; l > 0; l /= 2) {
                                        ^2.39M
  ------------------
  |  Branch (93:20): [True: 63.80%, False: 36.20%]
  ------------------
   94|  2.39M|        sumL = (a[l].aff.a * sumL + a[l].aff.b * sizL) % P;
   95|  2.39M|      }
   96|  1.35M|      wt.uw(u32(sumL));
   97|  1.35M|    }
   98|  2.71M|  }
   99|     19|  return 0;
  100|     19|}