Source code for flint.optim.adagrad

import numpy as np
from .optimizer import Optimizer

[docs]class Adagrad(Optimizer): """ Implementation of Adagrad algorithm proposed in [1]. .. math:: h_t = h_{t-1} + g_t^2 .. math:: \\theta_{t+1} = \\theta_t - \\frac{\\text{lr}}{\sqrt{h_t + \epsilon}} \cdot g_t Parameters ---------- params : iterable An iterable of Tensor lr : float, optional, default=0.01 Learning rate eps : float, optional, default=1e-10 Term added to the denominator to improve numerical stability weight_decay : float, optional, default=0) Weight decay (L2 penalty) References ---------- 1. "`Adaptive Subgradient Methods for Online Learning and Stochastic Optimization. <https://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_" John Duchi, et al. JMRL 2011. """ def __init__( self, params = None, lr: float = 0.01, eps: float = 1e-10, weight_decay: float = 0. ): super(Adagrad, self).__init__(params, lr, weight_decay) self.eps = eps self.h = [np.zeros_like(p.data) for p in self.params]
[docs] def step(self): for i, (h, p) in enumerate(zip(self.h, self.params)): if p.requires_grad: # l2 penalty p_grad = p.grad + self.weight_decay * p.data # accumulate squared gradients h += p.grad ** 2 self.h[i] = h # update parameters p.data -= self.lr * p.grad / np.sqrt(h + self.eps) super(Adagrad, self).step()