/tmp/solutions/build/point_set_range_composite-fast.cpp:
    1|       |#include <common.h>
    2|       |#include <toy/bit.h>
    3|       |prelude;
    4|       |
    5|       |namespace {
    6|       |
    7|       |constexpr int N = 1e6;
    8|       |constexpr int P = 998244353;
    9|       |
   10|       |struct node {
   11|       |  u32 a, b;
   12|  5.23M|  auto operator+(node t) -> node {
   13|  5.23M|    return {u32(u64(a) * t.a % P), u32((u64(a) * t.b + b) % P)};
   14|  5.23M|  }
   15|  4.18M|  auto operator+(u32 x) -> u32 { return (u64(a) * x + b) % P; }
   16|       |} a[N];
   17|       |
   18|       |} // namespace
   19|       |
   20|      1|int main() {
   21|      1|  rd rd;
   22|      1|  wt wt;
   23|      1|  int n = rd.uh();
   24|      1|  int q = rd.uh();
   25|   500k|  for (int i = 0; i < n; ++i) a[n + n - 1 - i] = {rd.uw(), rd.uw()};
                                       ^500k^500k
  ------------------
  |  Branch (25:19): [True: 100.00%, False: 0.00%]
  ------------------
   26|   500k|  for (int i = n - 1; i >= 1; --i) a[i] = a[i * 2] + a[i * 2 + 1];
                                            ^499k^499k
  ------------------
  |  Branch (26:23): [True: 100.00%, False: 0.00%]
  ------------------
   27|   500k|  while (q--) {
  ------------------
  |  Branch (27:10): [True: 100.00%, False: 0.00%]
  ------------------
   28|   500k|    let t = rd.u1();
   29|   500k|    if (t == 0) {
  ------------------
  |  Branch (29:9): [True: 49.92%, False: 50.08%]
  ------------------
   30|   249k|      int k = n + n - 1 - rd.uh();
   31|   249k|      a[k] = {rd.uw(), rd.uw()};
   32|  4.98M|      for (k /= 2; k > 0; k /= 2) a[k] = a[k * 2] + a[k * 2 + 1];
                                        ^4.73M  ^4.73M
  ------------------
  |  Branch (32:20): [True: 94.99%, False: 5.01%]
  ------------------
   33|   249k|    }
   34|   500k|    if (t == 1) {
  ------------------
  |  Branch (34:9): [True: 50.08%, False: 49.92%]
  ------------------
   35|   250k|      int r = n + n - rd.uh();
   36|   250k|      int l = n + n - 1 - rd.uh();
   37|   250k|      u32 x = rd.uw();
   38|   250k|      int k = log(l ^ r);
   39|   250k|      int R = r >> k;
   40|  2.34M|      for (r = r >> __builtin_ctz(r) ^ 1; r > R; r = r >> __builtin_ctz(r) ^ 1)
                                                               ^2.09M
  ------------------
  |  Branch (40:43): [True: 89.32%, False: 10.68%]
  ------------------
   41|  2.09M|        x = a[r] + x;
   42|  2.34M|      for (int t = ~l & ~(-1 << k), i; t > 0; t -= 1 << i) {
                                                            ^2.09M
  ------------------
  |  Branch (42:40): [True: 89.30%, False: 10.70%]
  ------------------
   43|  2.09M|        i = log(t);
   44|  2.09M|        x = a[l >> i ^ 1] + x;
   45|  2.09M|      }
   46|   250k|      wt.uw(x);
   47|   250k|    }
   48|   500k|  }
   49|      1|  return 0;
   50|      1|}