import numpy as np
from .optimizer import Optimizer
[docs]class SGD(Optimizer):
"""
Implementation of Stochastic Gradient Descent (optionally with
momentum).
.. math::
v_{t+1} = \mu \cdot v_t + g_{t+1}
.. math::
\\theta_{t+1} = \\theta_t - \\text{lr} \cdot v_{t+1}
where :math:`\\theta`, :math:`g`, :math:`v` and :math:`\mu` denote the
parameters, gradient, velocity, and momentum respectively.
Parameters
----------
params : iterable
An iterable of Tensor
lr : float, optional, default=0.01
Learning rate
momentum : float, optional, default=0.
Momentum factor
nesterov : bool, optional, default=False
Enable Nesterov momentum or not
weight_decay : float, optional, default=0
Weight decay (L2 penalty)
"""
def __init__(
self,
params = None,
lr: float = 0.01,
momentum: float = 0.,
nesterov: bool = False,
weight_decay: float = 0.
):
super(SGD, self).__init__(params, lr, weight_decay)
self.momentum = momentum
self.nesterov = nesterov
self.v = [np.zeros_like(p.data) for p in self.params]
[docs] def step(self):
for i, (v, p) in enumerate(zip(self.v, self.params)):
if p.requires_grad:
# l2 penalty
p_grad = p.grad + self.weight_decay * p.data
# heavy ball / polyak's momentum
v = self.momentum * v + p_grad
self.v[i] = v
# nesterov's momentum
if self.nesterov:
v = self.momentum * v + p_grad
# update parameters
p.data -= self.lr * v
super(SGD, self).step()