@article{oai:oist.repo.nii.ac.jp:00002724, author = {Clifton, Ben E. and Kozome, Dan and Laurino, Paola}, journal = {Biochemistry}, month = {Mar}, note = {The rapid growth of sequence databases over the past two decades means that protein engineers faced with optimizing a protein for any given task will often have immediate access to a vast number of related protein sequences. These sequences encode information about the evolutionary history of the protein and the underlying sequence requirements to produce folded, stable, and functional protein variants. Methods that can take advantage of this information are an increasingly important part of the protein engineering tool kit. In this Perspective, we discuss the utility of sequence data in protein engineering and design, focusing on recent advances in three main areas: the use of ancestral sequence reconstruction as an engineering tool to generate thermostable and multifunctional proteins, the use of sequence data to guide engineering of multipoint mutants by structure-based computational protein design, and the use of unlabeled sequence data for unsupervised and semisupervised machine learning, allowing the generation of diverse and functional protein sequences in unexplored regions of sequence space. Altogether, these methods enable the rapid exploration of sequence space within regions enriched with functional proteins and therefore have great potential for accelerating the engineering of stable, functional, and diverse proteins for industrial and biomedical applications.}, title = {Efficient Exploration of Sequence Space by Sequence-Guided Protein Engineering and Design}, year = {2022} }