-
River Riddle authored
Reland [GreedPatternRewriter] Preprocess constants while building worklist when not processing top down Reland Note: Adds a fix to properly mark a commutative operation as folded if we change the order of its operands. This was uncovered by the fact that we no longer re-process constants. This avoids accidentally reversing the order of constants during successive application, e.g. when running the canonicalizer. This helps reduce the number of iterations, and also avoids unnecessary changes to input IR. Fixes #51892 Differential Revision: https://reviews.llvm.org/D122692
River Riddle authoredReland [GreedPatternRewriter] Preprocess constants while building worklist when not processing top down Reland Note: Adds a fix to properly mark a commutative operation as folded if we change the order of its operands. This was uncovered by the fact that we no longer re-process constants. This avoids accidentally reversing the order of constants during successive application, e.g. when running the canonicalizer. This helps reduce the number of iterations, and also avoids unnecessary changes to input IR. Fixes #51892 Differential Revision: https://reviews.llvm.org/D122692
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
dense.mlir 10.69 KiB
// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s -sparsification | FileCheck %s
// Test to demonstrate the difference between non-annotated dense tensors
// and all-dense-annotated "sparse" tensors. The former class remains as
// two-dimensional tensors that are bufferized by subsequent passes. The
// latter class is linearized into one-dimensional buffers that are backed
// by the runtime support library.
#DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>
#trait_2d = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // A
affine_map<(i,j) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel"],
doc = "X(i,j) = A(i,j) + 1"
}
#trait_3d = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel", "reduction"],
doc = "X(i,j) += A(i,j,k)"
}
//
// Test with an all-dense-annotated "sparse" matrix as input and
// a non-annotated dense matrix as output that is not inplacable.
// This results in an explicit allocation to facilitate output.
//
// CHECK-LABEL: func @dense1(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = false}) -> tensor<32x16xf32> {
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
// CHECK-DAG: %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32>
// CHECK: memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32x16xf32> to memref<32x16xf32>
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index
// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_11]] : index
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
// CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_14]], %[[VAL_2]] : f32
// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32x16xf32>
// CHECK: return %[[VAL_16]] : tensor<32x16xf32>
// CHECK: }
func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
%argx: tensor<32x16xf32> {linalg.inplaceable = false})
-> tensor<32x16xf32> {
%c = arith.constant 1.0 : f32
%0 = linalg.generic #trait_2d
ins(%arga: tensor<32x16xf32, #DenseMatrix>)
outs(%argx: tensor<32x16xf32>) {
^bb(%a: f32, %x: f32):
%1 = arith.addf %a, %c : f32
linalg.yield %1 : f32
} -> tensor<32x16xf32>
return %0 : tensor<32x16xf32>
}
//
// Test with an all-dense-annotated "sparse" matrix as input and
// a non-annotated dense matrix as output that is inplacable.
// This allows updating the dense output in place.
//
// CHECK-LABEL: func @dense2(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = true}) -> tensor<32x16xf32> {
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32
// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32x16xf32>
// CHECK: return %[[VAL_15]] : tensor<32x16xf32>
// CHECK: }
func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,
%argx: tensor<32x16xf32> {linalg.inplaceable = true})
-> tensor<32x16xf32> {
%c = arith.constant 1.0 : f32
%0 = linalg.generic #trait_2d
ins(%arga: tensor<32x16xf32, #DenseMatrix>)
outs(%argx: tensor<32x16xf32>) {
^bb(%a: f32, %x: f32):
%1 = arith.addf %a, %c : f32
linalg.yield %1 : f32
} -> tensor<32x16xf32>
return %0 : tensor<32x16xf32>
}
//
// Test with a non-annotated dense matrix as input and
// an all-dense annotated "sparse" matrix as output.
// The rewriting would fail if argx was not in-placeable.
//
// CHECK-LABEL: func @dense3(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32
// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: }
func @dense3(%arga: tensor<32x16xf32>,
%argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
-> tensor<32x16xf32, #DenseMatrix> {
%c = arith.constant 1.0 : f32
%0 = linalg.generic #trait_2d
ins(%arga: tensor<32x16xf32>)
outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
^bb(%a: f32, %x: f32):
%1 = arith.addf %a, %c : f32
linalg.yield %1 : f32
} -> tensor<32x16xf32, #DenseMatrix>
return %0 : tensor<32x16xf32, #DenseMatrix>
}
//
// Test with a non-annotated dense matrix as input and
// an all-dense annotated "sparse" matrix as output.
// The rewriting would fail if argx was not in-placeable.
// The missing innermost "k" index (due to a reduction) is accounted
// for by scalarizing the reduction operation for the output tensor.
//
// CHECK-LABEL: func @dense4(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 8 : index
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]], %[[VAL_15]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_16]], %[[VAL_17]] : f32
// CHECK: scf.yield %[[VAL_18]] : f32
// CHECK: }
// CHECK: memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: }
func @dense4(%arga: tensor<32x16x8xf32>,
%argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
-> tensor<32x16xf32, #DenseMatrix> {
%0 = linalg.generic #trait_3d
ins(%arga: tensor<32x16x8xf32>)
outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
^bb(%a: f32, %x: f32):
%1 = arith.addf %x, %a : f32
linalg.yield %1 : f32
} -> tensor<32x16xf32, #DenseMatrix>
return %0 : tensor<32x16xf32, #DenseMatrix>
}