KAUST in the summers of 2019/2020, where I worked on the distributed & stochastic optimization. Prior to that, I did some research on accelerating the training of neural networks by with Prof. Amir Atiya.
Understanding Outer Optimizers in Local SGD: Learning Rates, Momentum, and Acceleration
arXiv preprint, with Satyen Kale, Arthur Douillard, Chi Jin, Rob Fergus, and Manzil Zaheer.
[bibtex]@article{khaled25_und_outer,
author={Ahmed Khaled and Satyen Kale and Arthur Douillard and Chi Jin and Rob Fergus and Manzil Zaheer},
year=2025,
title={Understanding Outer Optimizers in Local SGD: Learning Rates, Momentum, and Acceleration},
journal={arXiv preprint arXiv:2509.10439},
volume={abs/2509.10439},
url={https://arxiv.org/abs/2509.10439},
}
Directional Smoothness and Gradient Methods: Convergence and Adaptivity
NeurIPS 2024, with Aaron Mishkin, Yuanhao Wang, Aaron Defazio, and Robert M. Gower.
[bibtex]@article{mishkin24_dir_smoothness,
author = {Mishkin, Aaron and Khaled, Ahmed and Wang, Yuanhao and Defazio, Aaron and Gower, Robert M.},
url = {https://arxiv.org/abs/2403.04081},
year = {2024},
journal = {arXiv preprint arXiv:2403.04081},
title = {Directional Smoothness and Gradient Methods: Convergence and Adaptivity},
volume = {abs/2403.04081},
}
The Road Less Scheduled
NeurIPS 2024 Oral, with Aaron Defazio, Xingyu (Alice) Yang, Harsh Mehta, Konstantin Mishchenko, and Ashok Cutkosky.
[bibtex]@article{defazio2024road,
title={The road less scheduled},
author={Defazio, Aaron and Yang, Xingyu Alice and Mehta, Harsh and Mishchenko, Konstantin and Khaled, Ahmed and Cutkosky, Ashok},
journal={arXiv preprint arXiv:2405.15682},
year={2024}
}
Federated Optimization Algorithms with Random Reshuffling and Gradient Compression
NeurIPS 2024, with Abdurakhmon Sadiev, Grigory Malinovsky, Eduard Gorbunov, Igor Sokolov, Konstantin Burlachenko, and Peter Richtárik.
[bibtex]@article{sadiev22_feder_optim_algor_with_random,
author = {Sadiev, Abdurakhmon and Malinovsky, Grigory and Gorbunov, Eduard and Sokolov, Igor and Khaled, Ahmed and Burlachenko, Konstantin and Richtárik, Peter},
url = {https://arXiv.org/abs/2206.07021},
year = {2022},
journal = {arXiv preprint arXiv:2206.07021},
title = {Federated Optimization Algorithms With Random Reshuffling and Gradient Compression},
volume = {abs/2206.07021},
}
Tuning-Free Stochastic Optimization
ICML 2024 Spotlight, with Chi Jin.
[bibtex]@article{khaled24_tuning_free,
author = {Khaled, Ahmed and Jin, Chi},
url = {https://arxiv.org/abs/2402.07793},
year = {2024},
journal = {arXiv preprint arXiv:2402.07793},
title = {Tuning-Free Stochastic Optimization},
volume = {abs/2402.07793},
}
DoWG Unleashed: An Efficient Universal Parameter-Free Gradient Descent Method
Advances in Neural Information Processing Systems 35 (NeurIPS 2023), with Chi Jin and Konstantin Mishchenko.
[bibtex]@article{khaled23_dowg,
author = {Khaled, Ahmed and Mishchenko, Konstantin and Jin, Chi},
url = {https://arXiv.org/abs/2305.16284},
year = {2023},
journal = {arXiv preprint arXiv:2305.16284},
title = {{DoWG} Unleashed: an Efficient Universal Parameter-Free Gradient Descent Method},
volume = {abs/2305.16284},
}
Faster federated optimization under second-order similarity
The 11th International Conference on Learning Representations (ICLR 2023), with Chi Jin.
[bibtex]@Article{kj22_faster_federated,
author = {Ahmed Khaled and Chi Jin},
title = {{Faster federated optimization under second-order similarity}},
journal = {arXiv preprint arXiv:2209.02257},
year = {2022}
}
Better Theory for SGD in the Nonconvex World
Transactions on Machine Learning Research (TMLR) 2023, with Peter Richtárik. Original preprint arXiv:2002.03329 on arXiv since 2020.
[bibtex]@Article{KR2020sgdnonconvex,
author = {Ahmed Khaled and Peter Richt{\'a}rik},
title = {{Better Theory for SGD in the Nonconvex World}},
journal = {arXiv preprint arXiv:2002.03329},
year = {2020}
}
Proximal and Federated Random Reshuffling
The 39th International Conference on Machine Learning (ICML 2022), with Konstantin Mishchenko and Peter Richtárik.
[bibtex]@Article{MKR2020randomreshuffling,
author = {Konstantin Mishchenko and Ahmed Khaled and Peter Richt{\'a}rik},
title = {{Proximal and Federated Random Reshuffling}},
journal = {arXiv preprint arXiv:2102.06704},
year = {2021},
}
FLIX: A Simple and Communication-Efficient Alternative to Local Methods in Federated Learning
The 25th International Conference on Artificial Intelligence and Statistics (AISTATS 2022), with Elnur Gasanov, Samuel Horváth, and Peter Richtárik.
[bibtex]@InProceedings{GKHR2022flix,
title = { {FLIX}: A Simple and Communication-Efficient Alternative to Local Methods in Federated Learning },
author = {Gasanov, Elnur and Khaled, Ahmed and Horv\'ath, Samuel and Richt{\'a}rik, Peter},
booktitle = {Proceedings of The 25th International Conference on Artificial Intelligence and Statistics},
pages = {11374--11421},
year = {2022},
editor = {Camps-Valls, Gustau and Ruiz, Francisco J. R. and Valera, Isabel},
volume = {151},
series = {Proceedings of Machine Learning Research},
month = {28--30 Mar},
publisher = {PMLR},
pdf = {https://proceedings.mlr.press/v151/gasanov22a/gasanov22a.pdf},
url = {https://proceedings.mlr.press/v151/gasanov22a.html},
abstract = { Federated Learning (FL) is an increasingly popular machine learning paradigm in which multiple nodes try to collaboratively learn under privacy, communication and multiple heterogeneity constraints. A persistent problem in federated learning is that it is not clear what the optimization objective should be: the standard average risk minimization of supervised learning is inadequate in handling several major constraints specific to federated learning, such as communication adaptivity and personalization control. We identify several key desiderata in frameworks for federated learning and introduce a new framework, FLIX, that takes into account the unique challenges brought by federated learning. FLIX has a standard finite-sum form, which enables practitioners to tap into the immense wealth of existing (potentially non-local) methods for distributed optimization. Through a smart initialization that does not require any communication, FLIX does not require the use of local steps but is still provably capable of performing dissimilarity regularization on par with local methods. We give several algorithms for solving the FLIX formulation efficiently under communication constraints. Finally, we corroborate our theoretical results with extensive experimentation. }
}
Random Reshuffling: Simple Analysis with Vast Improvements
Advances in Neural Information Processing Systems 33 (NeurIPS 2020), with Konstantin Mishchenko and Peter Richtárik.
[bibtex]@Article{MKR2020randomreshuffling,
author = {Konstantin Mishchenko and Ahmed Khaled and Peter Richt{\'a}rik},
title = {{Random Reshuffling: Simple Analysis with Vast Improvements}},
journal = {arXiv preprint arXiv:2006.05988},
year = {2020},
}
Tighter Theory for Local SGD on Identical and Heterogeneous Data
The 23rd International Conference on Artificial Intelligence and Statistics (AISTATS) 2020, with Konstantin Mishschenko and Peter Richtárik. Extends the workshop papers (a, b).
[bibtex]@InProceedings{KMR2020localsgd,
title = {Tighter Theory for Local SGD on Identical and Heterogeneous Data},
author = {Khaled, Ahmed and Mishchenko, Konstantin and Richt{\'a}rik, Peter},
booktitle = {Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics},
pages = {4519--4529},
year = {2020},
editor = {Chiappa, Silvia and Calandra, Roberto},
volume = {108},
series = {Proceedings of Machine Learning Research},
address = {Online},
month = {26--28 Aug},
publisher = {PMLR}
}
Unified Analysis of Stochastic Gradient Methods for Composite Convex and Smooth Optimization
Journal version to appear in JOTA 2023, original preprint 2020, with Othmane Sebbouh, Nicolas Loizou, Robert M. Gower, and Peter Richtárik.
[bibtex]@Article{KSLGR2020unifiedsgm,
author = {Ahmed Khaled and Othmane Sebbouh and Nicolas Loizou and Robert M. Gower and Peter Richt{\'a}rik},
title = {{Unified Analysis of Stochastic Gradient Methods for Composite Convex and Smooth Optimization}},
journal = {arXiv preprint arXiv:2006.11573},
year = {2020},
}
Distributed Fixed Point Methods with Compressed Iterates
Preprint (2019), with Sélim Chraibi, Dmitry Kovalev, Peter Richtárik, Adil Salim, and Martin Takáč.
[bibtex]@Article{CKKRST2019distributed,
title={{Distributed Fixed Point Methods with Compressed Iterates}},
author={S\'{e}lim Chraibi and Ahmed Khaled and Dmitry Kovalev and Peter Richt{\'a}rik and Adil Salim and Martin Tak\'{a}\v{c}},
journal={arXiv preprint arXiv:1912.09925},
year={2019}
}
Applying Fast Matrix Multiplication to Neural Networks
The 35th ACM/SIGAPP Symposium On Applied Computing (ACM SAC) 2020, with Amir F. Atiya and Ahmed H. Abdel-Gawad.
[bibtex]@inproceedings{KAA2020Applyingfmm,
author = {Khaled, Ahmed and Atiya, Amir F. and Abdel-Gawad, Ahmed H.},
title = {Applying Fast Matrix Multiplication to Neural Networks},
year = {2020},
isbn = {9781450368667},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3341105.3373852},
doi = {10.1145/3341105.3373852},
booktitle = {Proceedings of the 35th Annual ACM Symposium on Applied Computing},
pages = {1034–1037},
numpages = {4},
keywords = {neural networks, fast matrix multiplication, GPU matrix multiplication, strassen’s algorithm, winograd’s algorithm},
location = {Brno, Czech Republic},
series = {SAC ’20}
}