Source code for flint.optim.sgd

import numpy as np
from .optimizer import Optimizer

[docs]class SGD(Optimizer): """ Implementation of Stochastic Gradient Descent (optionally with momentum). .. math:: v_{t+1} = \mu \cdot v_t + g_{t+1} .. math:: \\theta_{t+1} = \\theta_t - \\text{lr} \cdot v_{t+1} where :math:`\\theta`, :math:`g`, :math:`v` and :math:`\mu` denote the parameters, gradient, velocity, and momentum respectively. Parameters ---------- params : iterable An iterable of Tensor lr : float, optional, default=0.01 Learning rate momentum : float, optional, default=0. Momentum factor nesterov : bool, optional, default=False Enable Nesterov momentum or not weight_decay : float, optional, default=0 Weight decay (L2 penalty) """ def __init__( self, params = None, lr: float = 0.01, momentum: float = 0., nesterov: bool = False, weight_decay: float = 0. ): super(SGD, self).__init__(params, lr, weight_decay) self.momentum = momentum self.nesterov = nesterov self.v = [np.zeros_like(p.data) for p in self.params]
[docs] def step(self): for i, (v, p) in enumerate(zip(self.v, self.params)): if p.requires_grad: # l2 penalty p_grad = p.grad + self.weight_decay * p.data # heavy ball / polyak's momentum v = self.momentum * v + p_grad self.v[i] = v # nesterov's momentum if self.nesterov: v = self.momentum * v + p_grad # update parameters p.data -= self.lr * v super(SGD, self).step()