/tmp/solutions/build/point_set_range_composite-fast.cpp:
    1|       |#include <common.h>
    2|       |#include <toy/bit.h>
    3|       |prelude;
    4|       |
    5|       |namespace {
    6|       |
    7|       |constexpr int N = 1e6;
    8|       |constexpr int P = 998244353;
    9|       |
   10|       |struct node {
   11|       |  u32 a, b;
   12|  3.07M|  auto operator+(node t) -> node {
   13|  3.07M|    return {u32(u64(a) * t.a % P), u32((u64(a) * t.b + b) % P)};
   14|  3.07M|  }
   15|  2.56M|  auto operator+(u32 x) -> u32 { return (u64(a) * x + b) % P; }
   16|       |} a[N];
   17|       |
   18|       |} // namespace
   19|       |
   20|      1|int main() {
   21|      1|  rd rd;
   22|      1|  wt wt;
   23|      1|  int n = rd.uh();
   24|      1|  int q = rd.uh();
   25|  53.3k|  for (int i = 0; i < n; ++i) a[n + n - 1 - i] = {rd.uw(), rd.uw()};
                                       ^53.3k^53.3k
  ------------------
  |  Branch (25:19): [True: 100.00%, False: 0.00%]
  ------------------
   26|  53.3k|  for (int i = n - 1; i >= 1; --i) a[i] = a[i * 2] + a[i * 2 + 1];
                                            ^53.3k^53.3k
  ------------------
  |  Branch (26:23): [True: 100.00%, False: 0.00%]
  ------------------
   27|   382k|  while (q--) {
  ------------------
  |  Branch (27:10): [True: 100.00%, False: 0.00%]
  ------------------
   28|   382k|    let t = rd.u1();
   29|   382k|    if (t == 0) {
  ------------------
  |  Branch (29:9): [True: 50.07%, False: 49.93%]
  ------------------
   30|   191k|      int k = n + n - 1 - rd.uh();
   31|   191k|      a[k] = {rd.uw(), rd.uw()};
   32|  3.21M|      for (k /= 2; k > 0; k /= 2) a[k] = a[k * 2] + a[k * 2 + 1];
                                        ^3.01M  ^3.01M
  ------------------
  |  Branch (32:20): [True: 94.04%, False: 5.96%]
  ------------------
   33|   191k|    }
   34|   382k|    if (t == 1) {
  ------------------
  |  Branch (34:9): [True: 49.93%, False: 50.07%]
  ------------------
   35|   190k|      int r = n + n - rd.uh();
   36|   190k|      int l = n + n - 1 - rd.uh();
   37|   190k|      u32 x = rd.uw();
   38|   190k|      int k = log(l ^ r);
   39|   190k|      int R = r >> k;
   40|  1.48M|      for (r = r >> __builtin_ctz(r) ^ 1; r > R; r = r >> __builtin_ctz(r) ^ 1)
                                                               ^1.29M
  ------------------
  |  Branch (40:43): [True: 87.15%, False: 12.85%]
  ------------------
   41|  1.29M|        x = a[r] + x;
   42|  1.46M|      for (int t = ~l & ~(-1 << k), i; t > 0; t -= 1 << i) {
                                                            ^1.27M
  ------------------
  |  Branch (42:40): [True: 86.97%, False: 13.03%]
  ------------------
   43|  1.27M|        i = log(t);
   44|  1.27M|        x = a[l >> i ^ 1] + x;
   45|  1.27M|      }
   46|   190k|      wt.uw(x);
   47|   190k|    }
   48|   382k|  }
   49|      1|  return 0;
   50|      1|}