BackwardDot #

Conv2D backward dot-level correctness (bridge lemma).

The runtime autograd engine (NN.Runtime.Autograd.Engine) computes Conv2D gradients via the handwritten spec:

Spec.conv2d_backward_spec (kernel/bias/input)

For the analytic spec-level theorem over ℝ, Conv2D is already covered via fderiv/adjoints in:

NN.Proofs.Autograd.Tape.Ops.Conv.FDeriv

That file provides a proof-only Conv2D node whose VJP is (fderiv forward)†, so any DAG using it is covered by the global theorem Graph.backpropVec_eq_adjoint_fderiv.

What remains here is the dot/adjointness bridge:

dot (JVP_conv2d …) δ = dot dKernel gK + dot dBias gB + dot dInput gX

where (gK, gB, gX) = Spec.conv2d_backward_spec … δ.

The padding-related rewrites needed for the input-gradient proof are factored into:

NN/Spec/Layers/Utils.lean (get_at_or_zero_pad_multi_channel* lemmas)

source

theorem Proofs.Autograd.Conv2D.dot_add_right {s : Spec.Shape} (a b c : Spec.Tensor ℝ s) :

Spec.dot a (b.addSpec c) = Spec.dot a b + Spec.dot a c

source

theorem Proofs.Autograd.Conv2D.dot_dim {n : ℕ} {s : Spec.Shape} (a b : Spec.Tensor ℝ (Spec.Shape.dim n s)) :

Spec.dot a b = ∑ i : Fin n, Spec.dot (Spec.get a i) (Spec.get b i)

source

theorem Proofs.Autograd.Conv2D.dot_scalar (x y : ℝ) :

Spec.dot (Spec.Tensor.scalar x) (Spec.Tensor.scalar y) = x * y

source

theorem Proofs.Autograd.Conv2D.dot_vec_eq_sum_get {n : ℕ} (a b : Spec.Tensor ℝ (Spec.Shape.dim n Spec.Shape.scalar)) :

Spec.dot a b = ∑ i : Fin n, Spec.getAtOrZero a [↑i] * Spec.getAtOrZero b [↑i]

source

@[simp]

theorem Proofs.Autograd.Conv2D.get_at_or_zero_tensor_cast {s t : Spec.Shape} (h : s = t) (x : Spec.Tensor ℝ s) (idx : List ℕ) :

Spec.getAtOrZero (x.castShape h) idx = Spec.getAtOrZero x idx

source

theorem Proofs.Autograd.Conv2D.get2_eq_get_at_or_zero {m n : ℕ} (A : Spec.Tensor ℝ (Spec.Shape.dim m (Spec.Shape.dim n Spec.Shape.scalar))) (i : Fin m) (j : Fin n) :

Spec.get2 A i j = Spec.getAtOrZero A [↑i, ↑j]

source

theorem Proofs.Autograd.Conv2D.get_at_or_zero_get_channel {C H W : ℕ} (t : Spec.Tensor ℝ (Spec.Shape.dim C (Spec.Shape.dim H (Spec.Shape.dim W Spec.Shape.scalar)))) (c : Fin C) (i : Fin H) (j : Fin W) :

Spec.getAtOrZero (Spec.get t c) [↑i, ↑j] = Spec.getAtOrZero t [↑c, ↑i, ↑j]

source

def Proofs.Autograd.Conv2D.biasBroadcast {outC outH outW : ℕ} (db : Spec.Tensor ℝ (Spec.Shape.dim outC Spec.Shape.scalar)) :

Spec.Tensor ℝ (Spec.Shape.dim outC (Spec.Shape.dim outH (Spec.Shape.dim outW Spec.Shape.scalar)))

Broadcast a bias gradient outC across spatial axes into an outC×outH×outW tensor.

Instances For

source

theorem Proofs.Autograd.Conv2D.conv2d_bias_deriv_get {inC outC kH kW stride padding inH inW : ℕ} {h1 : inC ≠ 0} {h2 : kH ≠ 0} {h3 : kW ≠ 0} (layer : Spec.Conv2DSpec inC outC kH kW stride padding ℝ h1 h2 h3) (input : Spec.MultiChannelImage inC inH inW ℝ) (δ : Spec.MultiChannelImage outC ((inH + 2 * padding - kH) / stride + 1) ((inW + 2 * padding - kW) / stride + 1) ℝ) (oc : Fin outC) :

Spec.getAtOrZero (Spec.conv2dBiasDerivSpec layer input δ) [↑oc] = ∑ i : Fin ((inH + 2 * padding - kH) / stride + 1), ∑ j : Fin ((inW + 2 * padding - kW) / stride + 1), Spec.getAtOrZero δ [↑oc, ↑i, ↑j]

source

theorem Proofs.Autograd.Conv2D.dot_biasBroadcast_eq_dot_bias_deriv {inC outC kH kW stride padding inH inW : ℕ} {h1 : inC ≠ 0} {h2 : kH ≠ 0} {h3 : kW ≠ 0} (layer : Spec.Conv2DSpec inC outC kH kW stride padding ℝ h1 h2 h3) (input : Spec.MultiChannelImage inC inH inW ℝ) (db : Spec.Tensor ℝ (Spec.Shape.dim outC Spec.Shape.scalar)) (δ : Spec.MultiChannelImage outC ((inH + 2 * padding - kH) / stride + 1) ((inW + 2 * padding - kW) / stride + 1) ℝ) :

Spec.dot (biasBroadcast db) δ = Spec.dot db (Spec.conv2dBiasDerivSpec layer input δ)

source

theorem Proofs.Autograd.Conv2D.dot3_eq_sum {C H W : ℕ} (a b : Spec.Tensor ℝ (Spec.Shape.dim C (Spec.Shape.dim H (Spec.Shape.dim W Spec.Shape.scalar)))) :

Spec.dot a b = ∑ c : Fin C, ∑ i : Fin H, ∑ j : Fin W, Spec.getAtOrZero a [↑c, ↑i, ↑j] * Spec.getAtOrZero b [↑c, ↑i, ↑j]

source

theorem Proofs.Autograd.Conv2D.get_at_or_zero_get_outer3 {OC IC KH KW : ℕ} (k : Spec.Tensor ℝ (Spec.Shape.dim OC (Spec.Shape.dim IC (Spec.Shape.dim KH (Spec.Shape.dim KW Spec.Shape.scalar))))) (oc : Fin OC) (ic : Fin IC) (di : Fin KH) (dj : Fin KW) :

Spec.getAtOrZero (Spec.get k oc) [↑ic, ↑di, ↑dj] = Spec.getAtOrZero k [↑oc, ↑ic, ↑di, ↑dj]

source

theorem Proofs.Autograd.Conv2D.dot4_eq_sum {OC IC KH KW : ℕ} (a b : Spec.Tensor ℝ (Spec.Shape.dim OC (Spec.Shape.dim IC (Spec.Shape.dim KH (Spec.Shape.dim KW Spec.Shape.scalar))))) :

Spec.dot a b = ∑ oc : Fin OC, ∑ ic : Fin IC, ∑ di : Fin KH, ∑ dj : Fin KW, Spec.getAtOrZero a [↑oc, ↑ic, ↑di, ↑dj] * Spec.getAtOrZero b [↑oc, ↑ic, ↑di, ↑dj]

source

theorem Proofs.Autograd.Conv2D.sum_mul {ι : Type} [Fintype ι] (f : ι → ℝ) (a : ℝ) :

(∑ i : ι, f i) * a = ∑ i : ι, f i * a

source

theorem Proofs.Autograd.Conv2D.mul_sum {ι : Type} [Fintype ι] (a : ℝ) (f : ι → ℝ) :

a * ∑ i : ι, f i = ∑ i : ι, a * f i

source

theorem Proofs.Autograd.Conv2D.sum_comm {α β : Type} [Fintype α] [Fintype β] (f : α → β → ℝ) :

∑ a : α, ∑ b : β, f a b = ∑ b : β, ∑ a : α, f a b

paddedInput matches the forward spec’s padding convention:

if padding = 0, it is just a shape cast; and
otherwise it is pad_multi_channel.

PyTorch analogue: torch.nn.functional.pad (or implicit padding inside conv2d). https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html https://pytorch.org/docs/stable/generated/torch.nn.functional.conv2d.html

source

noncomputable def Proofs.Autograd.Conv2D.paddedInput {inC inH inW padding : ℕ} (input : Spec.MultiChannelImage inC inH inW ℝ) :

Spec.MultiChannelImage inC (inH + 2 * padding) (inW + 2 * padding) ℝ

Instances For

source

theorem Proofs.Autograd.Conv2D.get_at_or_zero_paddedInput {inC inH inW padding : ℕ} (img : Spec.MultiChannelImage inC inH inW ℝ) (c : Fin inC) (p q : ℕ) :

Spec.getAtOrZero (paddedInput img) [↑c, p, q] = if _h : p < padding ∨ q < padding then 0 else Spec.getAtOrZero img [↑c, p - padding, q - padding]

source

theorem Proofs.Autograd.Conv2D.mkInputIdx_match_eq_paddedInput {inC inH inW stride padding : ℕ} (img : Spec.MultiChannelImage inC inH inW ℝ) (c : Fin inC) (oi di oj dj : ℕ) :

(match Spec.Private.mkInputIdx? [oi, oj] [di, dj] [stride, stride] [padding, padding] with | none => 0 | some inIdx => Spec.getAtOrZero img (↑c :: inIdx)) = Spec.getAtOrZero (paddedInput img) [↑c, oi * stride + di, oj * stride + dj]

source

theorem Proofs.Autograd.Conv2D.sum_shift_eq_paddedInput {inC inH inW padding : ℕ} (x : Spec.MultiChannelImage inC inH inW ℝ) (ic : Fin inC) (p q : ℕ) :

(∑ i : Fin inH, ∑ j : Fin inW, if p = ↑i + padding ∧ q = ↑j + padding then Spec.getAtOrZero x [↑ic, ↑i, ↑j] else 0) = Spec.getAtOrZero (paddedInput x) [↑ic, p, q]

Output shape helpers (no dilation): these are the standard “convolution arithmetic” formulas. They are kept as definitions so later statements can share the same expression.

source

def Proofs.Autograd.Conv2D.outH (inH kH stride padding : ℕ) :

ℕ

Instances For

source

def Proofs.Autograd.Conv2D.outW (inW kW stride padding : ℕ) :

ℕ

Output width helper (no dilation).

Instances For

source

theorem Proofs.Autograd.Conv2D.conv2d_spec_noBias_get {inC outC kH kW stride padding inH inW : ℕ} {h1 : inC ≠ 0} {h2 : kH ≠ 0} {h3 : kW ≠ 0} (dKernel : Spec.Tensor ℝ (Spec.Shape.dim outC (Spec.Shape.dim inC (Spec.Shape.dim kH (Spec.Shape.dim kW Spec.Shape.scalar))))) (input : Spec.MultiChannelImage inC inH inW ℝ) (oc : Fin outC) (i : Fin (outH inH kH stride padding)) (j : Fin (outW inW kW stride padding)) :

have layerK := { kernel := dKernel, bias := Spec.fill 0 (Spec.Shape.dim outC Spec.Shape.scalar) }; Spec.getAtOrZero (Spec.conv2dSpec layerK input) [↑oc, ↑i, ↑j] = ∑ ic : Fin inC, ∑ di : Fin kH, ∑ dj : Fin kW, Spec.getAtOrZero dKernel [↑oc, ↑ic, ↑di, ↑dj] * Spec.getAtOrZero (paddedInput input) [↑ic, ↑i * stride + ↑di, ↑j * stride + ↑dj]

source

theorem Proofs.Autograd.Conv2D.conv2d_kernel_deriv_get {inC outC kH kW stride padding inH inW : ℕ} {h1 : inC ≠ 0} {h2 : kH ≠ 0} {h3 : kW ≠ 0} (layer : Spec.Conv2DSpec inC outC kH kW stride padding ℝ h1 h2 h3) (input : Spec.MultiChannelImage inC inH inW ℝ) (δ : Spec.MultiChannelImage outC (outH inH kH stride padding) (outW inW kW stride padding) ℝ) (oc : Fin outC) (ic : Fin inC) (di : Fin kH) (dj : Fin kW) :

Spec.getAtOrZero (Spec.conv2dKernelDerivSpec layer input δ) [↑oc, ↑ic, ↑di, ↑dj] = ∑ i : Fin (outH inH kH stride padding), ∑ j : Fin (outW inW kW stride padding), Spec.getAtOrZero (paddedInput input) [↑ic, ↑i * stride + ↑di, ↑j * stride + ↑dj] * Spec.getAtOrZero δ [↑oc, ↑i, ↑j]

source

theorem Proofs.Autograd.Conv2D.conv2d_input_deriv_get {inC outC kH kW stride padding inH inW : ℕ} {h1 : inC ≠ 0} {h2 : kH ≠ 0} {h3 : kW ≠ 0} (layer : Spec.Conv2DSpec inC outC kH kW stride padding ℝ h1 h2 h3) (input : Spec.MultiChannelImage inC inH inW ℝ) (δ : Spec.MultiChannelImage outC (outH inH kH stride padding) (outW inW kW stride padding) ℝ) (ic : Fin inC) (i : Fin inH) (j : Fin inW) :

Spec.getAtOrZero (Spec.conv2dInputDerivSpec layer input δ) [↑ic, ↑i, ↑j] = ∑ oc : Fin outC, ∑ oi : Fin (outH inH kH stride padding), ∑ oj : Fin (outW inW kW stride padding), ∑ di : Fin kH, ∑ dj : Fin kW, if _h : ↑oi * stride + ↑di = ↑i + padding ∧ ↑oj * stride + ↑dj = ↑j + padding then Spec.getAtOrZero δ [↑oc, ↑oi, ↑oj] * Spec.getAtOrZero layer.kernel [↑oc, ↑ic, ↑di, ↑dj] else 0

TorchLean API

NN.Proofs.Autograd.Tape.Ops.Conv.BackwardDot.Common

BackwardDot #