Handle matrix times matrix = vector case (#227)

dkarrasch · devmotion · web-flow · commit d70ba91c423e · 2023-05-12T22:45:07.000+02:00
* Handle matrix times matrix = vector case

* add test

* actually cover it

* use LinearAlgebra

* Apply suggestions from code review

Co-authored-by: David Widmann &lt;devmotion@users.noreply.github.com&gt;

* avoid ambiguity, add tests

* Update test/api/GradientTests.jl

Co-authored-by: David Widmann &lt;devmotion@users.noreply.github.com&gt;

* fix

---------

Co-authored-by: David Widmann &lt;devmotion@users.noreply.github.com&gt;
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReverseDiff"
 uuid = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-version = "1.14.5"
+version = "1.14.6"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
diff --git a/src/derivatives/linalg/arithmetic.jl b/src/derivatives/linalg/arithmetic.jl
@@ -270,7 +270,17 @@ end
 # a * b
 
 function reverse_mul!(output, output_deriv, a, b, a_tmp, b_tmp)
-    istracked(a) && increment_deriv!(a, mul!(a_tmp, output_deriv, transpose(value(b))))
+    if istracked(a)
+        if a_tmp isa AbstractVector && b isa AbstractMatrix
+            # this branch is required for scalar-valued functions that
+            # involve outer-products of vectors, for such functions, the target
+            # a_temp is a vector, but when b is a matrix, we cannot multiply into a vector,
+            # so need to reshape memory to look like matrix (see PositiveFactorizations.jl)
+            increment_deriv!(a, mul!(reshape(a_tmp, :, 1), output_deriv, transpose(value(b))))
+        else
+            increment_deriv!(a, mul!(a_tmp, output_deriv, transpose(value(b))))
+        end
+    end
     istracked(b) && increment_deriv!(b, mul!(b_tmp, transpose(value(a)), output_deriv))
 end
 
@@ -279,8 +289,14 @@ for (f, F) in ((:transpose, :Transpose), (:adjoint, :Adjoint))
         # a * f(b)
         function reverse_mul!(output, output_deriv, a, b::$F, a_tmp, b_tmp)
             _b = ($f)(b)
-            istracked(a) && increment_deriv!(a, mul!(a_tmp, output_deriv, mulargvalue(b)))
-            istracked(_b) && increment_deriv!(_b, ($f)(mul!(b_tmp, ($f)(output_deriv), value(a))))
+            if istracked(a)
+                if a_tmp isa AbstractVector
+                    increment_deriv!(a, mul!(reshape(a_tmp, :, 1), output_deriv, mulargvalue(_b)))
+                else
+                    increment_deriv!(a, mul!(a_tmp, output_deriv, mulargvalue(b)))
+                end
+            end
+            istracked(_b) && increment_deriv!(_b, ($f)(mul!(($f)(b_tmp), ($f)(output_deriv), value(a))))
         end
            # f(a) * b
         function reverse_mul!(output, output_deriv, a::$F, b, a_tmp, b_tmp)
diff --git a/test/api/GradientTests.jl b/test/api/GradientTests.jl
@@ -1,6 +1,6 @@
 module GradientTests
 
-using DiffTests, ForwardDiff, ReverseDiff, Test
+using DiffTests, ForwardDiff, ReverseDiff, Test, LinearAlgebra
 
 include(joinpath(dirname(@__FILE__), "../utils.jl"))
 
@@ -187,6 +187,20 @@ for f in DiffTests.VECTOR_TO_NUMBER_FUNCS
     test_unary_gradient(f, rand(5))
 end
 
+# PR #227
+norm_hermitian1(v) = (A = I - 2 * v * v'; norm(A' * A))
+norm_hermitian2(v) = (A = I - 2 * v * transpose(v); norm(transpose(A) * A))
+norm_hermitian3(v) = (A = I - 2 * v * collect(v'); norm(collect(A') * A))
+norm_hermitian4(v) = (A = I - 2 * v * v'; norm(transpose(A) * A))
+norm_hermitian5(v) = (A = I - 2 * v * transpose(v); norm(A' * A))
+norm_hermitian6(v) = (A = (v'v)*I - 2 * v * v'; norm(A' * A))
+
+for f in (norm_hermitian1, norm_hermitian2, norm_hermitian3,
+            norm_hermitian4, norm_hermitian5, norm_hermitian6)
+    test_println("VECTOR_TO_NUMBER_FUNCS", f)
+    test_unary_gradient(f, rand(5))
+end
+
 for f in DiffTests.TERNARY_MATRIX_TO_NUMBER_FUNCS
     test_println("TERNARY_MATRIX_TO_NUMBER_FUNCS", f)
     test_ternary_gradient(f, rand(5, 5), rand(5, 5), rand(5, 5))
diff --git a/test/derivatives/LinAlgTests.jl b/test/derivatives/LinAlgTests.jl
@@ -223,8 +223,15 @@ for f in (
     test_arr2num(f, x, tp)
 end
 
+# PR #227
+function norm_hermitian(v)
+    A = I - 2 * v * v'
+    return norm(A' * A)
+end
+
 for f in (
     y -> vec(y)' * Matrix{Float64}(I, length(y), length(y)) * vec(y),
+    norm_hermitian,
 )
     test_println("Array -> Number functions", f)
     test_arr2num(f, x, tp, ignore_tape_length=true)