Derivative of a Function That Includes @diff Macro

Hello. 

I am currently using Julia Versio 1.6.3 on a Platform "OS: Linux (x86_64-pc-linux-gnu) CPU: Intel(R) Xeon(R) Gold 6248 CPU @ 2.50GHz, GPU : CuDevice(0): Tesla T4". I am trying to implement a variational autoencoder called Gradient Origin Networks (GONs). GONs are introduced as a generative model which does not require  encoders or hypernetworks. Assume Variational GON model called F. First, a zero vector z_0 is passed through the model F, and then the latent vector initialized as the minus gradient of the loss with respect to this zero vector. Therefore, the latent space is determined by only one gradient step. Let us call this latent vector as z. Then, the network parameters are optimized by using the loss with the reconstruction F(z). 

I am currently performing my experiments on MNIST dataset where I linearly interpolated the images to the size of 32x32. The decoding and reparametrization functions are as follows. theta is a vector of model weights.

```
function reparametrize(mu, logvar)
    
    std = exp.(0.5 .* logvar)
    epsilon = convert(Atype, randn(F, size(mu)))
    z = mu .+ epsilon .* std
    
    return z
end

function decode(theta, z; batch_size = 64, training = true)
    
    mu = theta[1] * z .+ theta[2]
    logvar = theta[3] * z .+ theta[4]
    
    z = reparametrize(mu, logvar)
    
    z = reshape(z, (1, 1, nz, batch_size))
    z = deconv4(theta[5], z, mode = 1) .+ theta[6]
    z = batchnorm(z, bnmoments(), theta[7]; training = training)
    z = Knet.elu.(z)
    
    z = deconv4(theta[8], z, stride = 2, padding = 1, mode = 1) .+ theta[9]
    z = batchnorm(z, bnmoments(), theta[10]; training = training)
    z = Knet.elu.(z)
    
    z = deconv4(theta[11], z, stride = 2, padding = 1, mode = 1) .+ theta[12]
    z = batchnorm(z, bnmoments(), theta[13]; training = training)
    z = Knet.elu.(z)
    
    z = deconv4(theta[14], z, stride = 2, padding = 1, mode = 1) .+ theta[15]
    x_hat = Knet.sigm.(z)
    
    return x_hat, mu, logvar
    
end
```

For the loss, it is used binary cross-entropy and KL-divergence. The code is given as follows.

```
function BCE(x_tensor,x_hat_tensor)
    x = mat(x_tensor)
    x_hat = mat(x_hat_tensor)
    return -mean(sum((x .* log.(x_hat .+ F(1e-10)) + (1 .- x) .* log.(1 .- x_hat .+ F(1e-10))), dims = 1))
end

function KLD(mu, logvar)
    var = exp.(logvar)
    std = sqrt.(var)
    KL = -0.5 * mean(sum(1 .+ logvar .- (mu .* mu) - exp.(logvar), dims = 1))
    return KL
end

function loss(theta, x, z)
    x_hat, mu, logvar = decode(theta, z)
    L = BCE(x, x_hat) + KLD(mu, logvar)
    return L
end
```

Since there are two steps for GON (1-) Use the gradient w.r.t. origin to determine the latent space z, 2-) Use latent space for reconstruction) I need to track all the gradient w.r.t. model weights from the steps (1) and (2). Therefore, I wrote the following decoding function and loss function for training purpose.

```
function decode_train(theta, x; batch_size = 64,training = true)
    origin = param(Atype(zeros(nz, batch_size)))

    derivative_origin = @diff loss(value.(theta), x, origin)
    dz = grad(derivative_origin, origin)

    z = -value(dz)

    x_hat, mu, logvar = decode(theta, origin);
    return x_hat, mu, logvar
end

function loss_train(theta, x)
    x_hat, mu, logvar = decode_train(theta, x)
    L = BCE(x, x_hat) + KLD(mu, logvar)
    return L
end
```

However, I am not able to take the gradient of the " loss_train(theta, x)" function. I am getting the following error when I use the @diff macro of AutoGrad package. How can I handle to train this model which requires a second order derivative (I need the derivative of the function decode_train)?
To reproduce this result, you can run the following notebook :
 https://github.com/BariscanBozkurt/Gradient-Origin-Networks/blob/main/GON_Implementation_Issue.ipynb
My code:
`@diff loss_train(theta, x)`
The error is:

> Stacktrace:
>   [1] copyto!(a::KnetArray{Float32, 4}, b::Base.Broadcast.Broadcasted{Base.Broadcast.Style{AutoGrad.Value}, NTuple{4, Base.OneTo{Int64}}, typeof(identity), Tuple{AutoGrad.Result{KnetArray{Float32, 4}}}})
>     @ Knet.KnetArrays ~/.julia/packages/Knet/RCkV0/src/knetarrays/broadcast.jl:35
>   [2] copyto!(x::AutoGrad.Result{KnetArray{Float32, 4}}, y::Base.Broadcast.Broadcasted{Base.Broadcast.Style{AutoGrad.Value}, NTuple{4, Base.OneTo{Int64}}, typeof(identity), Tuple{AutoGrad.Result{KnetArray{Float32, 4}}}})
>     @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:55
>   [3] materialize!
>     @ ./broadcast.jl:894 [inlined]
>   [4] materialize!
>     @ ./broadcast.jl:891 [inlined]
>   [5] materialize!(dest::AutoGrad.Result{KnetArray{Float32, 4}}, x::AutoGrad.Result{KnetArray{Float32, 4}})
>     @ Base.Broadcast ./broadcast.jl:887
>   [6] batchnorm4_back(g::KnetArray{Float32, 4}, x::AutoGrad.Result{KnetArray{Float32, 4}}, dy::AutoGrad.Result{KnetArray{Float32, 4}}; eps::Float64, training::Bool, cache::Knet.Ops20.BNCache, moments::Knet.Ops20.BNMoments, o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
>     @ Knet.Ops20 ~/.julia/packages/Knet/RCkV0/src/ops20/batchnorm.jl:262
>   [7] #batchnorm4x#191
>     @ ~/.julia/packages/Knet/RCkV0/src/ops20/batchnorm.jl:317 [inlined]
>   [8] #back#210
>     @ ./none:0 [inlined]
>   [9] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
>     @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:165
>  [10] differentiate
>     @ ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135 [inlined]
>  [11] decode_train(theta::Vector{Any}, x::KnetArray{Float32, 4}; batch_size::Int64, training::Bool)
>     @ Main ./In[14]:4
>  [12] decode_train
>     @ ./In[14]:2 [inlined]
>  [13] loss_train(theta::Vector{Any}, x::KnetArray{Float32, 4})
>     @ Main ./In[16]:2
>  [14] (::var"#16#17")()
>     @ Main ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:205
>  [15] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
>     @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:144
>  [16] differentiate(::Function)
>     @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135
>  [17] top-level scope
>     @ In[18]:1
>  [18] eval
>     @ ./boot.jl:360 [inlined]
>  [19] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
>     @ Base ./loading.jl:1116
>  [20] softscope_include_string(m::Module, code::String, filename::String)
>     @ SoftGlobalScope ~/.julia/packages/SoftGlobalScope/u4UzH/src/SoftGlobalScope.jl:65
>  [21] execute_request(socket::ZMQ.Socket, msg::IJulia.Msg)
>     @ IJulia ~/.julia/packages/IJulia/e8kqU/src/execute_request.jl:67
>  [22] #invokelatest#2
>     @ ./essentials.jl:708 [inlined]
>  [23] invokelatest
>     @ ./essentials.jl:706 [inlined]
>  [24] eventloop(socket::ZMQ.Socket)
>     @ IJulia ~/.julia/packages/IJulia/e8kqU/src/eventloop.jl:8
>  [25] (::IJulia.var"#15#18")()
>     @ IJulia ./task.jl:411
> MethodError: no method matching copyto!(::KnetArray{Float32, 4}, ::AutoGrad.Result{KnetArray{Float32, 4}})
> Closest candidates are:
>   copyto!(::KnetArray{T, N} where N, ::Array{T, N} where N) where T at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:10
>   copyto!(::KnetArray{T, N} where N, ::Array{S, N} where N) where {T, S} at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:18
>   copyto!(::KnetArray{T, N} where N, ::KnetArray{T, N} where N) where T at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:9
>   ...
> 
> Stacktrace:
>  [1] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
>    @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:148
>  [2] differentiate(::Function)
>    @ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135
>  [3] top-level scope
>    @ In[18]:1
>  [4] eval
>    @ ./boot.jl:360 [inlined]
>  [5] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
>    @ Base ./loading.jl:1116

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Derivative of a Function That Includes @diff Macro #670

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Derivative of a Function That Includes @diff Macro #670

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions