// RUN: triton-opt %s -test-print-alignment -split-input-file -verify-diagnostics=only-expected -o /dev/null

tt.func @cast() {
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %cst = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %0 = arith.extsi %cst : i32 to i64
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %cst_tensor = arith.constant dense<1> : tensor<128xi32>
  // Bitcast preserves axis info for same-width types.
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = tt.bitcast %cst_tensor : tensor<128xi32> -> tensor<128xf32>
  tt.return
}

// -----

tt.func @add() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = arith.constant dense<1> : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>}}
  %2 = arith.addi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 127}}
  %3 = arith.constant dense<127> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128}}
  %4 = arith.addi %1, %3 : tensor<128xi32>
  tt.return
}

// -----

tt.func @addptr(%arg0: !tt.ptr<i1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<i8> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<i16> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<i32> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<i64> {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %cst1 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %0 = tt.addptr %arg0, %cst1 : !tt.ptr<i1>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %1 = tt.addptr %arg1, %cst1 : !tt.ptr<i8>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = <none>}}
  %2 = tt.addptr %arg2, %cst1 : !tt.ptr<i16>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %3 = tt.addptr %arg3, %cst1 : !tt.ptr<i32>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = <none>}}
  %4 = tt.addptr %arg4, %cst1 : !tt.ptr<i64>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = 4}}
  %cst4 = arith.constant 4 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %5 = tt.addptr %arg0, %cst4 : !tt.ptr<i1>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %6 = tt.addptr %arg1, %cst4 : !tt.ptr<i8>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = <none>}}
  %7 = tt.addptr %arg2, %cst4 : !tt.ptr<i16>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
  %8 = tt.addptr %arg3, %cst4 : !tt.ptr<i32>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
  %9 = tt.addptr %arg4, %cst4 : !tt.ptr<i64>, i32
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %10 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>}}
  %11 = tt.expand_dims %10 {axis = 0: i32} : tensor<128xi32> -> tensor<1x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [128, 1], constant_value = <none>}}
  %12 = tt.broadcast %11 : tensor<1x128xi32> -> tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %13 = tt.splat %arg0 : !tt.ptr<i1> -> tensor<128x128x!tt.ptr<i1>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %14 = tt.splat %arg1 : !tt.ptr<i8> -> tensor<128x128x!tt.ptr<i8>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %15 = tt.splat %arg2 : !tt.ptr<i16> -> tensor<128x128x!tt.ptr<i16>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %16 = tt.splat %arg3 : !tt.ptr<i32> -> tensor<128x128x!tt.ptr<i32>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %17 = tt.splat %arg4 : !tt.ptr<i64> -> tensor<128x128x!tt.ptr<i64>>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 16], constancy = [128, 1], constant_value = <none>}}
  %18 = tt.addptr %13, %12 : tensor<128x128x!tt.ptr<i1>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 16], constancy = [128, 1], constant_value = <none>}}
  %19 = tt.addptr %14, %12 : tensor<128x128x!tt.ptr<i8>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [2, 16], constancy = [128, 1], constant_value = <none>}}
  %20 = tt.addptr %15, %12 : tensor<128x128x!tt.ptr<i16>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [4, 16], constancy = [128, 1], constant_value = <none>}}
  %21 = tt.addptr %16, %12 : tensor<128x128x!tt.ptr<i32>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [8, 16], constancy = [128, 1], constant_value = <none>}}
  %22 = tt.addptr %17, %12 : tensor<128x128x!tt.ptr<i64>>, tensor<128x128xi32>
  tt.return
}

// -----

tt.func @sub() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = arith.constant dense<1> : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>}}
  %2 = arith.subi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %3 = arith.subi %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 129}}
  %4 = arith.constant dense<129> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128}}
  %5 = arith.subi %4, %1 : tensor<128xi32>
  tt.return
}

// -----

tt.func @mul(%arg0: i64 {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = arith.constant dense<1> : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %2 = arith.muli %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128}}
  %3 = arith.constant dense<128> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128}}
  %4 = arith.muli %3, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2}}
  %5 = arith.constant dense<2> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [256], constancy = [128], constant_value = 256}}
  %6 = arith.muli %4, %5 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 4611686018427387904}}
  %7 = arith.constant 4611686018427387904: i64
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = <none>}}
  %8 = arith.muli %arg0, %7 : i64
  tt.return
}

// -----

tt.func @div(%arg0: i32 {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = arith.constant dense<1> : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %2 = arith.divsi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %3 = arith.divui %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64}}
  %4 = arith.constant dense<64> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [64], constant_value = <none>}}
  %5 = arith.divsi %0, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %6 = arith.divsi %4, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64}}
  %7 = arith.divsi %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66}}
  %8 = arith.constant dense<66> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [2], constant_value = <none>}}
  %9 = arith.divui %0, %8 : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [8192], constancy = [1], constant_value = <none>}}
  %10 = tt.make_range {end = 8320 : i32, start = 8192 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [64], constant_value = <none>}}
  %11 = arith.divsi %10, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = 2}}
  %12 = arith.constant 2 : i32
  // dividing a scalar by a power of two should give predictable divisibility
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = <none>}}
  %13 = arith.divsi %arg0, %12 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [32], constancy = [1], constant_value = 32}}
  %14 = arith.constant 32 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %15 = arith.divsi %arg0, %14 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = 6}}
  %16 = arith.constant 6 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %17 = arith.divsi %arg0, %16 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2}}
  %18 = arith.constant dense<2> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [2], constant_value = <none>}}
  %19 = arith.divsi %0, %18 : tensor<128xi32>
  tt.return
}


// -----

tt.func @rem() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %1 = arith.constant dense<1> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0}}
  %2 = arith.remsi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %3 = arith.remui %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64}}
  %4 = arith.constant dense<64> : tensor<128xi32>
  // expected-remark @below {{contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>}}
  %5 = arith.remsi %0, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %6 = arith.remsi %4, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66}}
  %7 = arith.constant dense<66> : tensor<128xi32>
  // expected-remark @below {{contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>}}
  %8 = arith.remui %0, %7 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 192}}
  %9 = arith.constant dense<192> : tensor<128xi32>
  // expected-remark @below {{contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>}}
  %10 = arith.remsi %0, %9 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %11 = arith.remsi %9, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [32], constancy = [1], constant_value = <none>}}
  %12 = tt.make_range {end = 160 : i32, start = 32 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %13 = arith.remsi %0, %12 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %14 = arith.remsi %12, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [32], divisibility = [32], constancy = [1], constant_value = <none>}}
  %15 = arith.remsi %12, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %16 = arith.remsi %4, %12 : tensor<128xi32>
  tt.return
}

// -----

tt.func @expanddims() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2}}
  %1 = arith.constant dense<2> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = <none>}}
  %2 = arith.muli %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [2, 2], constancy = [1, 1], constant_value = <none>}}
  %3 = tt.expand_dims %2 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32>
  tt.return
}

// -----

tt.func @broadcast() {
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64}}
  %0 = arith.constant dense<64> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 1], constant_value = 64}}
  %1 = tt.expand_dims %0 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 128], constant_value = 64}}
  %2 = tt.broadcast %1 : tensor<128x1xi32> -> tensor<128x128xi32>
  tt.return
}

// -----

tt.func @splat(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>}}
  %0 = tt.splat %arg0 : !tt.ptr<f32> -> tensor<128x128x!tt.ptr<f32>>
  tt.return
}

// -----

tt.func @cmp_all_contiguous() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0}}
  %1 = arith.constant dense<0> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %3 = arith.cmpi ne, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>}}
  %4 = arith.cmpi slt, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %5 = arith.cmpi sle, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>}}
  %6 = arith.cmpi sge, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %7 = arith.cmpi sgt, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %8 = arith.cmpi eq, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %9 = arith.cmpi ne, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %10 = arith.cmpi slt, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>}}
  %11 = arith.cmpi sle, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %12 = arith.cmpi sge, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>}}
  %13 = arith.cmpi sgt, %1, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
  %14 = arith.constant dense<8> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %15 = arith.cmpi sgt, %14, %0 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1}}
  %16 = arith.cmpi sgt, %14, %1 : tensor<128xi32>
  tt.return
}

tt.func @cmp_partial_contiguous() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
  %1 = arith.constant dense<8> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [32], constancy = [128], constant_value = 32}}
  %3 = arith.constant dense<32> : tensor<128xi32>
  // expected-remark @below {{contiguity = [32], divisibility = [32], constancy = [1], constant_value = <none>}}
  %4 = arith.remsi %0, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %5 = arith.cmpi eq, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %6 = arith.cmpi ne, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %7 = arith.cmpi slt, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %8 = arith.cmpi sle, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %9 = arith.cmpi sge, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %10 = arith.cmpi sgt, %4, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %11 = arith.cmpi eq, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %12 = arith.cmpi ne, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %13 = arith.cmpi slt, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %14 = arith.cmpi sle, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %15 = arith.cmpi sge, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %16 = arith.cmpi sgt, %1, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [128], constant_value = 48}}
  %17 = arith.constant dense<48> : tensor<128xi32>
  // expected-remark @below {{contiguity = [16], divisibility = [16], constancy = [1], constant_value = <none>}}
  %18 = arith.remsi %0, %17 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %19 = arith.cmpi eq, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %20 = arith.cmpi ne, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>}}
  %21 = arith.cmpi slt, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %22 = arith.cmpi sle, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>}}
  %23 = arith.cmpi sge, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %24 = arith.cmpi sgt, %18, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %25 = arith.cmpi eq, %3, %18 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %26 = arith.cmpi ne, %3, %18 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %27 = arith.cmpi slt, %3, %18 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>}}
  %28 = arith.cmpi sle, %3, %18 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %29 = arith.cmpi sge, %3, %18 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none}}
  %30 = arith.cmpi sgt, %3, %18 : tensor<128xi32>
  tt.return
}

// -----

tt.func @logic() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64}}
  %1 = arith.constant dense<64> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [64], constant_value = <none>}}
  %2 = arith.divsi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
  %3 = arith.constant dense<8> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %4 = arith.divsi %0, %3 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %5 = arith.andi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %6 = arith.ori %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %7 = arith.xori %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %8 = arith.andi %2, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %9 = arith.ori %2, %4 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>}}
  %10 = arith.xori %2, %4 : tensor<128xi32>
  tt.return
}

// -----

tt.func @select(%arg0 : i1, %arg1 : tensor<4xi1>) {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0}}
  %1 = arith.constant dense<0> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>}}
  %3 = arith.cmpi slt, %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %4 = arith.constant 0 : i1
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0}}
  %7 = tt.splat %4 : i1 -> tensor<128xi1>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0}}
  %5 = arith.select %4, %3, %7 : tensor<128xi1>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %8 = arith.select %7, %3, %2 : tensor<128xi1>, tensor<128xi1>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %9 = tt.expand_dims %2 {axis = 1 : i32} : tensor<128xi1> -> tensor<128x1xi1>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 1], constant_value = <none>}}
  %10 = tt.expand_dims %3 {axis = 1 : i32} : tensor<128xi1> -> tensor<128x1xi1>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %11 = arith.select %arg0, %9, %10 : tensor<128x1xi1>
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [4], constant_value = 4}}
  %cst = arith.constant dense<4> : tensor<4xi32>
  // expected-remark @below {{contiguity = [4], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %12 = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %13 = arith.muli %12, %cst : tensor<4xi32>
  // expected-remark @below {{contiguity = [4], divisibility = [16], constancy = [1], constant_value = <none>}}
  %14 = tt.make_range {end = 20 : i32, start = 16 : i32} : tensor<4xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %15 = arith.select %arg1, %12, %13 : tensor<4xi1>, tensor<4xi32>
  tt.return
}

// -----

tt.func @shift(%arg0: i32 {tt.divisibility = 4 : i32}) {
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [128], constant_value = <none>}}
  %s = tt.splat %arg0 : i32 -> tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
  %1 = arith.constant dense<8> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4}}
  %2 = arith.constant dense<4> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [256], constancy = [1], constant_value = <none>}}
  %3 = arith.shli %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %4 = arith.shrsi %0, %2 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128}}
  %5 = arith.shli %1, %2 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = <none>}}
  %6 = arith.shli %1, %s : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %7 = arith.shrsi %0, %s : tensor<128xi32>
  tt.return
}

// -----

tt.func @max_min() {
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [64], constancy = [1], constant_value = <none>}}
  %1 = tt.make_range {end = 192 : i32, start = 64 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [64], constancy = [1], constant_value = <none>}}
  %2 = arith.maxsi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [64], constancy = [1], constant_value = <none>}}
  %3 = arith.minsi %0, %1 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
  %4 = arith.constant dense<8> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4}}
  %5 = arith.constant dense<4> : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 8}}
  %6 = arith.maxsi %4, %5 : tensor<128xi32>
  tt.return
}

// -----

// A complicated example with different contiguity and divisibility in lhs and rhs.
// To simplify construction of the test we just pass attributes from the arguments
tt.func @contiguity_dependent_divisibility(%arg0: tensor<8xi32> {tt.contiguity = 8 : i32, tt.divisibility = 4 : i32, tt.constancy = 1 : i32}, %arg1: tensor<8xi32> {tt.contiguity = 2 : i32, tt.divisibility = 8 : i32, tt.constancy = 1 : i32}) {
  // expected-remark @below {{contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>}}
  %0 = arith.maxsi %arg0, %arg1 : tensor<8xi32>
  // expected-remark @below {{contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>}}
  %1 = arith.minsi %arg0, %arg1 : tensor<8xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %2 = arith.constant 0 : i1
  // expected-remark @below {{contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>}}
  %3 = arith.select %2, %0, %1 : tensor<8xi32>
  tt.return
}

// -----

tt.func @if(%i1 : i1) {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 32], constant_value = 64}}
  %cst_64 = arith.constant dense<64> : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = 1}}
  %cst_1 = arith.constant dense<1> : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 32], constant_value = 64}}
  %a = arith.muli %cst_64, %cst_1 : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>}}
  %ret = scf.if %i1 -> tensor<128x32xi32> {
    scf.yield %a : tensor<128x32xi32>
  } else {
    scf.yield %cst_1 : tensor<128x32xi32>
  }
  tt.return
}

// -----

tt.func @for() {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [4611686018427387904, 4611686018427387904], constancy = [128, 32], constant_value = 0}}
  %a_init = arith.constant dense<0> : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = 1}}
  %b_init = arith.constant dense<1> : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4}}
  %c_init = arith.constant dense<4> : tensor<128x32xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128}}
  %ub = arith.constant 128 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %lb = arith.constant 0 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16}}
  %step = arith.constant 16 : i32
  %a, %b, %c = scf.for %iv = %lb to %ub step %step iter_args(%a = %a_init, %b = %b_init, %c = %c_init) -> (tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>) : i32 {
    // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
    %t = arith.addi %iv, %lb : i32
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>}}
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>}}
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4}}
    scf.yield %b, %a, %c : tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>
  }
  tt.return
}

// -----

tt.func @for_dynamic(%lb: i32 {tt.divisibility = 16 : i32}, %step: i32 {tt.divisibility = 8 : i32}, %ub: i32) {
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %c0 = arith.constant 0 : i32
  scf.for %iv = %lb to %ub step %step : i32 {
    // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = <none>}}
    %t = arith.addi %iv, %c0 : i32
  }
  tt.return
}

// -----

tt.func @for_if(%i1: i1, %arg0: !tt.ptr<f16> {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %c0_i32 = arith.constant 0 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %c1_i32 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = 10}}
  %c10_i32 = arith.constant 10 : i32
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 64], constant_value = 64}}
  %cst = arith.constant dense<64> : tensor<128x64xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
  %1 = tt.splat %arg0 : !tt.ptr<f16> -> tensor<128x64x!tt.ptr<f16>>
  %2 = scf.for %arg9 = %c0_i32 to %c10_i32 step %c1_i32 iter_args(%arg1 = %1) -> (tensor<128x64x!tt.ptr<f16>>): i32 {
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{scf.if}}
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
    %3 = scf.if %i1 -> (tensor<128x64x!tt.ptr<f16>>) {
      scf.yield %arg1 : tensor<128x64x!tt.ptr<f16>>
    } else {
      scf.yield %arg1 : tensor<128x64x!tt.ptr<f16>>
    }
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{tt.addptr}}
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
    %4 = tt.addptr %3, %cst : tensor<128x64x!tt.ptr<f16>>, tensor<128x64xi32>
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{scf.for}}
    // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
    scf.yield %1 : tensor<128x64x!tt.ptr<f16>>
  }
  tt.return
}

// -----

tt.func @for_if_for(%i1: i1, %arg0: !tt.ptr<f16> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f16> {tt.divisibility = 8 : i32}) {
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %c0_i32 = arith.constant 0 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %c1_i32 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [1], constant_value = 10}}
  %c10_i32 = arith.constant 10 : i32
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 64], constant_value = 64}}
  %cst = arith.constant dense<64> : tensor<128x64xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
  %1 = tt.splat %arg0 : !tt.ptr<f16> -> tensor<128x64x!tt.ptr<f16>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [8, 8], constancy = [128, 64], constant_value = <none>}}
  %2 = tt.splat %arg1 : !tt.ptr<f16> -> tensor<128x64x!tt.ptr<f16>>
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{scf.for}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [8, 8], constancy = [128, 64], constant_value = <none>}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{scf.if}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [8, 8], constancy = [128, 64], constant_value = <none>}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{tt.addptr}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [8, 8], constancy = [128, 64], constant_value = <none>}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{scf.for}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 64], constant_value = <none>}}
  %3 = scf.for %arg9 = %c0_i32 to %c10_i32 step %c1_i32 iter_args(%arg2 = %1) -> (tensor<128x64x!tt.ptr<f16>>) : i32 {
    %4 = scf.if %i1 -> (tensor<128x64x!tt.ptr<f16>>) {
      %5 = scf.for %arg10 = %c0_i32 to %c10_i32 step %c1_i32 iter_args(%arg3 = %2) -> (tensor<128x64x!tt.ptr<f16>>) : i32 {
        scf.yield %arg3 : tensor<128x64x!tt.ptr<f16>>
      }
      scf.yield %5 : tensor<128x64x!tt.ptr<f16>>
    } else {
      scf.yield %arg2 : tensor<128x64x!tt.ptr<f16>>
    }
    %6 = tt.addptr %4, %cst : tensor<128x64x!tt.ptr<f16>>, tensor<128x64xi32>
    scf.yield %1 : tensor<128x64x!tt.ptr<f16>>
  }
  tt.return
}

// -----

tt.func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 128], constant_value = 1}}
  %cst = arith.constant dense<true> : tensor<128x128xi1>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %cst_0 = arith.constant dense<0.000000e+00> : tensor<128x128xf32>
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %1 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
  // expected-remark @below {{contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>}}
  %2 = tt.expand_dims %0 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>}}
  %3 = tt.splat %arg1 : i32 -> tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  %4 = arith.muli %2, %3 : tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>}}
  %5 = tt.splat %arg0 : !tt.ptr<f32> -> tensor<128x1x!tt.ptr<f32>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  %6 = tt.addptr %5, %4 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>}}
  %7 = tt.expand_dims %1 {axis = 0 : i32}: tensor<128xi32> -> tensor<1x128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>}}
  %8 = tt.broadcast %6 : tensor<128x1x!tt.ptr<f32>> -> tensor<128x128x!tt.ptr<f32>>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [128, 1], constant_value = <none>}}
  %9 = tt.broadcast %7 : tensor<1x128xi32> -> tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [4, 16], constancy = [1, 1], constant_value = <none>}}
  %10 = tt.addptr %8, %9 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>}}
  %11 = tt.expand_dims %0 {axis = 1 : i32}: tensor<128xi32> -> tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>}}
  %12 = tt.splat %arg2 : !tt.ptr<f32> -> tensor<128x1x!tt.ptr<f32>>
  // expected-remark @below {{contiguity = [128, 1], divisibility = [16, 4], constancy = [1, 1], constant_value = <none>}}
  %13 = tt.addptr %12, %11 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
  // expected-remark @below {{contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>}}
  %14 = tt.expand_dims %1 {axis = 0 : i32} : tensor<128xi32> -> tensor<1x128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>}}
  %15 = tt.splat %arg3 : i32 -> tensor<1x128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  %16 = arith.muli %14, %15 : tensor<1x128xi32>
  // expected-remark @below {{contiguity = [128, 1], divisibility = [16, 4], constancy = [1, 128], constant_value = <none>}}
  %17 = tt.broadcast %13 : tensor<128x1x!tt.ptr<f32>> -> tensor<128x128x!tt.ptr<f32>>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>}}
  %18 = tt.broadcast %16 : tensor<1x128xi32> -> tensor<128x128xi32>
  // expected-remark @below {{contiguity = [128, 1], divisibility = [16, 4], constancy = [1, 1], constant_value = <none>}}
  %19 = tt.addptr %17, %18 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %20 = tt.load %10, %cst, %cst_0 : tensor<128x128x!tt.ptr<f32>>
  tt.store %19, %20, %cst : tensor<128x128x!tt.ptr<f32>>
  tt.return
}

// -----

tt.func @load_constancy(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 1 : i32}) {
  // expected-remark @below {{divisibility = [16]}}
  %sixteen = arith.constant dense<16> : tensor<1024xi32>
  // expected-remark @below {{divisibility = [8]}}
  %eight = arith.constant dense<8> : tensor<1024xi32>
  // expected-remark @below {{contiguity = [1024], divisibility = [1073741824], constancy = [1]}}
  %1 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
  // expected-remark @below {{constancy = [16]}}
  %2 = arith.divsi %1, %sixteen : tensor<1024xi32>
  // expected-remark @below {{constancy = [1024]}}
  %3 = tt.splat %arg0 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
  // expected-remark @below {{constancy = [1024]}}
  %4 = tt.splat %arg1 : i32 -> tensor<1024xi32>
  // expected-remark @below {{constancy = [8]}}
  %5 = arith.divsi %1, %eight : tensor<1024xi32>
  // expected-remark @below {{constancy = [8]}}
  %6 = arith.cmpi slt, %5, %4 : tensor<1024xi32>
  // expected-remark @below {{constancy = [16]}}
  %7 = tt.addptr %3, %2 : tensor<1024x!tt.ptr<f32>>, tensor<1024xi32>
  // expected-remark @below {{constancy = [16]}}
  %8 = tt.load %7 : tensor<1024x!tt.ptr<f32>>
  // expected-remark @below {{constancy = [8]}}
  %9 = tt.load %7, %6 : tensor<1024x!tt.ptr<f32>>
  tt.return
}

// -----

// This is a tiny test for verifying StoreOp-related alignment, It simply store a constant to a buffer.
tt.func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %pid = tt.get_program_id x : i32
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128}}
  %c128_i32 = arith.constant 128 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [1], constant_value = <none>}}
  %1 = arith.muli %pid, %c128_i32 : i32
  // expected-remark @below {{contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>}}
  %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
 // expected-remark @below {{contiguity = [1], divisibility = [128], constancy = [128], constant_value = <none>}}
  %3 = tt.splat %1 : i32 -> tensor<128xi32>
 // expected-remark @below {{contiguity = [128], divisibility = [128], constancy = [1], constant_value = <none>}}
  %4 = arith.addi %3, %2 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>}}
  %5 = tt.splat %addr : !tt.ptr<f32> -> tensor<128x!tt.ptr<f32>>
  // expected-remark @below {{contiguity = [128], divisibility = [16], constancy = [1], constant_value = <none>}}
  %6 = tt.addptr %5, %4 : tensor<128x!tt.ptr<f32>>, tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>}}
  %9 = tt.splat %n : i32 -> tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>}}
  %mask = arith.cmpi slt, %4, %9 : tensor<128xi32>
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %cst = arith.constant dense<0.0> : tensor<128xf32>
  tt.store %5, %cst, %mask : tensor<128x!tt.ptr<f32>>
  tt.return
}

// -----

// This IR is dumped from vecadd test.
// Note, the hint {tt.divisibility = 16 : i32} for %n_elements affects the alignment of mask.
tt.func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) {
  %c64_i32 = arith.constant 64 : i32
  %0 = tt.get_program_id x : i32
  %1 = arith.muli %0, %c64_i32 : i32
  %2 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32>
  %3 = tt.splat %1 : i32 -> tensor<64xi32>
  %4 = arith.addi %3, %2 : tensor<64xi32>
  %5 = tt.splat %arg0 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  %6 = tt.addptr %5, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %7 = tt.splat %arg1 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %9 = tt.splat %n_elements : i32 -> tensor<64xi32>
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>}}
  %mask = arith.cmpi slt, %4, %9 : tensor<64xi32>
  %11 = tt.load %6, %mask : tensor<64x!tt.ptr<f32>>
  %12 = tt.load %8, %mask : tensor<64x!tt.ptr<f32>>
  %13 = arith.addf %11, %12 : tensor<64xf32>
  %14 = tt.splat %arg2 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{tt.addptr %{{.*}} => contiguity = [64], divisibility = [16], constancy = [1], constant_value = <none>}}
  %15 = tt.addptr %14, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  tt.store %15, %13, %mask : tensor<64x!tt.ptr<f32>>
  tt.return
}

// -----

// This IR is dumped from vecadd test.
// Note, there is no divisibility hint for %n_elements, Triton should assume its divisibility to be 1 by default.
tt.func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32) {
  %c64_i32 = arith.constant 64 : i32
  %0 = tt.get_program_id x : i32
  %1 = arith.muli %0, %c64_i32 : i32
  %2 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32>
  %3 = tt.splat %1 : i32 -> tensor<64xi32>
  %4 = arith.addi %3, %2 : tensor<64xi32>
  %5 = tt.splat %arg0 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  %6 = tt.addptr %5, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %7 = tt.splat %arg1 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %9 = tt.splat %n_elements : i32 -> tensor<64xi32>
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
  %10 = arith.cmpi slt, %4, %9 : tensor<64xi32>
  %11 = tt.load %6, %10 : tensor<64x!tt.ptr<f32>>
  %12 = tt.load %8, %10 : tensor<64x!tt.ptr<f32>>
  %13 = arith.addf %11, %12 : tensor<64xf32>
  %14 = tt.splat %arg2 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>>
  %15 = tt.addptr %14, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  tt.store %15, %13, %10 : tensor<64x!tt.ptr<f32>>
  tt.return
}

// -----

module {

// We don't use function cloning here, so the alignment info is the gcd of all call sites.
tt.func @addptr_hints(%arg0: !tt.ptr<i32>) {
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %cst1 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %1 = tt.addptr %arg0, %cst1 : !tt.ptr<i32>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = 4}}
  %cst4 = arith.constant 4 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %2 = tt.addptr %arg0, %cst4 : !tt.ptr<i32>, i32
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16}}
  %cst16 = arith.constant 16 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %3 = tt.addptr %arg0, %cst4 : !tt.ptr<i32>, i32
  tt.return
}

tt.func @kernel_div16(%arg0: !tt.ptr<i32> {tt.divisibility = 16 : i32}) {
  tt.call @addptr_hints(%arg0) : (!tt.ptr<i32>) -> ()
  tt.return
}

tt.func @kernel_div8(%arg0: !tt.ptr<i32> {tt.divisibility = 8 : i32}) {
  tt.call @addptr_hints(%arg0) : (!tt.ptr<i32>) -> ()
  tt.return
}

tt.func @kernel_div4(%arg0: !tt.ptr<i32> {tt.divisibility = 4 : i32}) {
  tt.call @addptr_hints(%arg0) : (!tt.ptr<i32>) -> ()
  tt.return
}

}

// -----

module {

// We don't use function cloning here, so the alignment info is the gcd of all call sites.
tt.func @mul(%arg0: i32) {
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %cst1 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %1 = arith.muli %arg0, %cst1 : i32
  tt.return
}

tt.func @bar(%arg0: i32) {
  tt.call @mul(%arg0) : (i32) -> ()
  tt.return
}

tt.func @foo(%arg0: i32) {
  tt.call @mul(%arg0) : (i32) -> ()
  tt.return
}

tt.func @call_graph(%arg0: i32) {
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = 12}}
  %cst12 = arith.constant 12 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [1], constant_value = <none>}}
  %0 = arith.muli %arg0, %cst12 : i32
  tt.call @foo(%0) : (i32) -> ()
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = 8}}
  %cst8 = arith.constant 8 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [1], constant_value = <none>}}
  %1 = arith.muli %arg0, %cst8 : i32
  tt.call @bar(%1) : (i32) -> ()
  tt.return
}

}

// -----

tt.func @tensor_ptr(%arg0: !tt.ptr<tensor<64x16xi32>, 1>) {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %0 = tt.load %arg0 : !tt.ptr<tensor<64x16xi32>, 1>
  tt.return
}


// -----

tt.func public @chained_for(%8: tensor<128x64x!tt.ptr<bf16>> {tt.divisibility = 16 : i32}) {
  // expected-remark @below {{contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>}}
  %cst = arith.constant dense<0.000000e+00> : tensor<128x64xbf16>
  // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16}}
  %c16_i32 = arith.constant 16 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1}}
  %c1_i32 = arith.constant 1 : i32
  // expected-remark @below {{contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0}}
  %c0_i32 = arith.constant 0 : i32
  // expected-remark @below {{contiguity = [1, 1], divisibility = [64, 64], constancy = [128, 64], constant_value = 64}}
  %cst_0 = arith.constant dense<64> : tensor<128x64xi32>
  // expected-remark @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  %9 = scf.for %arg7 = %c0_i32 to %c16_i32 step %c1_i32 iter_args(%arg8 = %8) -> (tensor<128x64x!tt.ptr<bf16>>)  : i32 {
    %11 = tt.addptr %arg8, %cst_0 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32>
    scf.yield %11 : tensor<128x64x!tt.ptr<bf16>>
  }
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  // TODO-remark(this remark is wrong, needs to be fixed) @below {{contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>}}
  %10 = scf.for %arg7 = %c0_i32 to %c16_i32 step %c1_i32 iter_args(%arg8 = %9) -> (tensor<128x64x!tt.ptr<bf16>>)  : i32 {
    tt.store %arg8, %cst : tensor<128x64x!tt.ptr<bf16>>
    %11 = tt.addptr %arg8, %cst_0 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32>
    scf.yield %11 : tensor<128x64x!tt.ptr<bf16>>
  }
  tt.return
}

// -----

module {
  tt.func @int_min_does_not_underflow_in_analysis() -> i64 {
    // expected-remark @below {{divisibility = [4611686018427387904]}}
    %int_min = arith.constant -9223372036854775808 : i64
    tt.return %int_min : i64
  }
}

// -----

tt.func @test_warp_specialize_propagation(%arg0: !tt.ptr<f16> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}) {
  ttg.warp_specialize(%arg0, %arg1)
  default {
    // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
    tt.addptr %arg0, %arg1 : !tt.ptr<f16>, i32
    ttg.warp_yield
  }
  partition0(%arg2: !tt.ptr<f16>, %arg3: i32) num_warps(1) {
    // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
    tt.addptr %arg2, %arg3 : !tt.ptr<f16>, i32
    ttg.warp_return
  }
  partition1(%arg2: !tt.ptr<f16>, %arg3: i32) num_warps(1) {
    // expected-remark @below {{contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>}}
    tt.addptr %arg2, %arg3 : !tt.ptr<f16>, i32
    ttg.warp_return
  } : (!tt.ptr<f16>, i32) -> ()
  tt.return
}