Files
clearlinux.github.io/node/670.html
2023-09-20 09:02:05 -07:00

563 lines
33 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'html' -->
<!-- FILE NAME SUGGESTIONS:
* html--node--670.html.twig
* html--node--%.html.twig
* html--node.html.twig
x html.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/html.html.twig' -->
<!DOCTYPE html>
<html lang="en" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ og: http://ogp.me/ns# rdfs: http://www.w3.org/2000/01/rdf-schema# schema: http://schema.org/ sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema# ">
<head>
<meta charset="utf-8" />
<meta name="description" content="A typical Linux* OS offers many options for reducing the storage space of data. The typical list of compression options includes things like zlib, xz, bzip2 as well as lz4 and Snappy. Several of these compression algorithms provide a tunable, called &quot;level&quot;, a number from 0 to 9 that changes the behavior of the algorithm. The result are many choices and this blog post tries to show the differences between these choices.  About data compression  " />
<meta property="og:site_name" content="Clear Linux* Project" />
<meta property="og:type" content="Blog" />
<meta property="og:url" content="https://clearlinux.org/news-blogs/linux-os-data-compression-options-comparing-behavior" />
<meta property="og:title" content="Linux OS data compression options: Comparing behavior" />
<meta property="og:description" content="A typical Linux* OS offers many options for reducing the storage space of data. The typical list of compression options includes things like zlib, xz, bzip2 as well as lz4 and Snappy. Several of these compression algorithms provide a tunable, called &quot;level&quot;, a number from 0 to 9 that changes the behavior of the algorithm. The result are many choices and this blog post tries to show the differences between these choices.  About data compression  " />
<meta name="Generator" content="Drupal 9 (https://www.drupal.org)" />
<meta name="MobileOptimized" content="width" />
<meta name="HandheldFriendly" content="true" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>div#sliding-popup, div#sliding-popup .eu-cookie-withdraw-banner, .eu-cookie-withdraw-tab {background: #0779BF} div#sliding-popup.eu-cookie-withdraw-wrapper { background: transparent; } #sliding-popup h1, #sliding-popup h2, #sliding-popup h3, #sliding-popup p, #sliding-popup label, #sliding-popup div, .eu-cookie-compliance-more-button, .eu-cookie-compliance-secondary-button, .eu-cookie-withdraw-tab { color: #ffffff;} .eu-cookie-withdraw-tab { border-color: #ffffff;}</style>
<link rel="icon" href="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/favicon.ico" type="image/vnd.microsoft.icon" />
<link rel="canonical" href="670.html" />
<link rel="shortlink" href="670.html" />
<script src="https://clearlinux.org/sites/default/files/eu_cookie_compliance/eu_cookie_compliance.script.js" defer></script>
<title>Linux OS data compression options: Comparing behavior | Clear Linux* Project</title>
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/ajax-progress.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/align.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/autocomplete-loading.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/fieldgroup.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/container-inline.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/clearfix.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/details.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/hidden.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/item-list.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/js.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/nowrap.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/position-container.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/progress.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/reset-appearance.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/resize.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/sticky-header.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-counter.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-report-counters.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-report-general-info.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tabledrag.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tablesort.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tree-child.module.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/contrib/eu_cookie_compliance/css/eu_cookie_compliance.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/contrib/extlink/extlink.css" />
<link rel="stylesheet" media="all" href="https://use.fontawesome.com/releases/v6.1.0/css/all.css" />
<link rel="stylesheet" media="all" href="https://use.fontawesome.com/releases/v6.1.0/css/v4-shims.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/libraries/codesnippet/lib/highlight/styles/monokai_sublime.css" />
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/css/styles.css" />
<link rel="stylesheet" media="all" href="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.2.1/assets/owl.carousel.min.css" integrity="sha256-AWqwvQ3kg5aA5KcXpX25sYKowsX97sTCTbeo33Yfyk0=" crossorigin="anonymous" />
<script src="https://clearlinux.org/core/assets/vendor/modernizr/modernizr.min.js?v=3.11.7"></script>
<script src="https://clearlinux.org/core/misc/modernizr-additional-tests.js?v=3.11.7"></script>
</head>
<body class="alias--news-blogs-linux-os-data-compression-options-comparing-behavior nodetype--blog logged-out">
<div id="skip">
<a class="visually-hidden focusable skip-link" href="670.html#main-menu">
Skip to main navigation
</a>
</div>
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'off_canvas_page_wrapper' -->
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/content/off-canvas-page-wrapper.html.twig' -->
<div class="dialog-off-canvas-main-canvas" data-off-canvas-main-canvas>
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'page' -->
<!-- FILE NAME SUGGESTIONS:
* page--node--blog.html.twig
* page--node--670.html.twig
* page--node--%.html.twig
* page--node.html.twig
x page.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/page.html.twig' -->
<!-- ______________________ HEADER _______________________ -->
<header id="header">
<div class="container padding-md--left-right">
<div class="header__menu_mobile">
<i class="fa fa-bars header__menu_mobile__control" aria-hidden="true"></i>
</div>
<div id="header__site_info">
<div class="header__site_img_wrapper">
<a href ="https://clearlinux.org/">
<img class="header__site_img_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/clear_linux_logo.svg" alt="Logo Clear Linux* Project"/>
<img class="header__site_txt_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/sass/components/layout/header/assets/clear-linux-text.svg" />
</a>
</div>
</div>
<nav class="header__menu">
<ul class="header__menu_list">
<li class="header__menu_list_item ">
<a tabindex='1' href="31099.html">About</a>
</li>
<li class="header__menu_list_item ">
<a tabindex='1' href="31103.html">Developer</a>
</li>
<li class="header__menu_list_item ">
<a tabindex='1' href="https://clearlinux.org/software/software.html">Software</a>
</li>
</ul>
</nav>
<div class="header__search">
<div class="header__search_form__wrapper">
</div>
</div>
</div>
</div>
</header>
<!-- /header -->
<div class="header__menu-submenu green">
<div class="toolbar__container">
<div class="container padding-md--left-right">
<ul class='Header__main'>
</ul>
</div>
</div>
</div>
<div class="wrapper banner blog" >
<div class="banner__gradient "></div>
<div class="container banner__container ">
<div class="banner__content">
<h1 class="banner__title">Blogs &amp; News</h1>
</div>
</div>
</div>
<!-- Page Header -->
<div class="page_header">
<div class="page_header__main">
<!-- tabs -->
</div>
</div>
<!-- End Page Header -->
<!-- ______________________ MAIN _______________________ -->
<main class="page-standard padding-md--top padding-lg--bottom padding-md--left-right container-xl">
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'region' -->
<!-- FILE NAME SUGGESTIONS:
x region--content.html.twig
* region.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/region--content.html.twig' -->
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'block' -->
<!-- FILE NAME SUGGESTIONS:
* block--clearlinux-theme-messages.html.twig
x block--system-messages-block.html.twig
* block--system.html.twig
* block.html.twig
-->
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/block/block--system-messages-block.html.twig' -->
<div data-drupal-messages-fallback class="hidden"></div>
<!-- END OUTPUT from 'core/themes/stable/templates/block/block--system-messages-block.html.twig' -->
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'block' -->
<!-- FILE NAME SUGGESTIONS:
x block--sharethis.html.twig
* block--sharethis-block.html.twig
x block--sharethis.html.twig
* block.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--sharethis.html.twig' -->
<div id="block-sharethis" data-block-plugin-id="sharethis_block" class="block block-sharethis block-sharethis-block social_share">
<div class="sharethis-wrapper">
<a target="_blank" href="https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux&amp%3Bsrc=sdkpreparse" class="st_facebook_custom"></a>
<a target="_blank" href="https://twitter.com/intent/tweet?text=Clear%20Linux*%20Project&url=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux" class="st_twitter_custom"></a>
<a target="_blank" href="https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux&title=Clear%20Linux*%20Project" class="st_linkedin_custom"></a>
</div>
</div>
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--sharethis.html.twig' -->
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'block' -->
<!-- FILE NAME SUGGESTIONS:
x block--clearlinux-theme-content.html.twig
* block--system-main-block.html.twig
* block--system.html.twig
* block.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--clearlinux-theme-content.html.twig' -->
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'node' -->
<!-- FILE NAME SUGGESTIONS:
* node--670--full.html.twig
* node--670.html.twig
x node--blog--full.html.twig
* node--blog.html.twig
* node--full.html.twig
* node.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/content/node--blog--full.html.twig' -->
<div class="blog_detail">
<div class="blog_detail__categories">
<a tabindex='2' href='../blogs_category_2.html' title='Power and Performance'>Power and Performance</a>
</div>
<h1 class="blog_detail__title">
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'field' -->
<!-- FILE NAME SUGGESTIONS:
* field--node--title--blog.html.twig
x field--node--title.html.twig
* field--node--blog.html.twig
* field--title.html.twig
* field--string.html.twig
* field.html.twig
-->
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/field/field--node--title.html.twig' -->
<span>Linux OS data compression options: Comparing behavior</span>
<!-- END OUTPUT from 'core/themes/stable/templates/field/field--node--title.html.twig' -->
</h1>
<p class="blog_detail__date">03 Jan, 2017</p>
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'links__node' -->
<!-- FILE NAME SUGGESTIONS:
* links--node.html.twig
x links.html.twig
-->
<!-- BEGIN OUTPUT from 'themes/contrib/cog/templates/navigation/links.html.twig' -->
<!-- END OUTPUT from 'themes/contrib/cog/templates/navigation/links.html.twig' -->
<!-- THEME DEBUG -->
<!-- THEME HOOK: 'field' -->
<!-- FILE NAME SUGGESTIONS:
* field--node--body--blog.html.twig
x field--node--body.html.twig
* field--node--blog.html.twig
* field--body.html.twig
* field--text-with-summary.html.twig
* field.html.twig
-->
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/field/field--node--body.html.twig' -->
<div class="Text__description">
<p><span>A typical Linux* OS offers many options for reducing the storage space of data. The typical list of compression options includes things like zlib, xz, bzip2 as well as lz4 and Snappy. Several of these compression algorithms provide a tunable, called "level", a number from 0 to 9 that changes the behavior of the algorithm. The result are many choices and this blog post tries to show the differences between these choices.<span> </span></span></p>
<h3><span>About data compression</span></h3>
<p> </p>
<p><span><span> </span></span><span>First, let us have a quick word about data compression algorithms. Data compression falls in the realm of the </span><span>information theory</span><span> field of science and has been thoroughly studied since </span><a href="https://en.wikipedia.org/wiki/Claude_Shannon"><span>Claude Shannon</span></a><span> started research in this area in 1948. I am going to spare you the hard sciency bits for the rest of the article but a quick high-level overview will be useful.</span></p>
<p><span><span> </span></span><span>Data compression algorithms take advantage of the </span><span>redundancy</span><span> in the files they are compressing to reduce the size of the files and thus the storage/transmission requirements. Generally, compression algorithms work in two steps: </span></p>
<ol><li>
<p><span>The algorithm uses information from earlier in the file to find repeating patterns.</span></p>
</li>
<li>
<p><span>The pattern is turned into an efficient stream of bits and bytes - the so called </span><span>entropy</span><span> </span><span>coding </span><span>- on the disk.</span></p>
</li>
</ol><p><span><span> </span></span><span>Various scientists have proven that an </span><span>optimal</span><span> compression algorithm exists. For example, the </span><a href="https://en.wikipedia.org/wiki/Context_tree_weighting"><span>Context Tree Weighting</span></a><span> was the first algorithm proven to be optimal for very large data streams with a fixed sized startup overhead. I've had the pleasure to be in a classroom watching professor Ziv prove the LZ77 algorithm is likewise theoretically optimal. The LZ77 family of algorithms form the basis of most data compression implementations used in this blog post.</span></p>
<p><span><span> </span>If multiple algorithms are proven to be theoretically optimal, why does this blog post exist? The catch is: this optimality only happens for very large data sets and, generally, without regard for the computation time and memory consumption this optimal algorithm would take to implement. The key is the first step as described before: finding the repeating patterns. The various implementations I will compare in this blog post differ primarily in how far back they search and how efficient their search algorithms are.</span></p>
<h3><span>Starting with zlib</span></h3>
<p> </p>
<p><span><span> </span></span><span>No blog post about data compression implementations can exist without talking about the </span><a href="http://zlib.net/"><span>zlib </span></a><span>algorithm, called Deflate, and its implementation. The zlib algorithm and file format are standardized in internet standards (RFC 1950, 1951 and 1952) and have been extremely widely used since 1995. Partly,  because they avoided many legal pitfalls which hit other algorithms in the litigious era of the 1990s.</span></p>
<p><span><span> </span></span><span>zlib provides 9 levels of compression and a "level 0" which just does a 1:1 uncompressed store of the data. The amount of compute time spent on searching for repeating patterns varies from level to level. The format on the disk for all levels is the same shielding a decompressor from the level differences.</span></p>
<p><span><span> </span></span><span>To show the differences between these levels, I have measured both the compression ratio, how much smaller did the data get, and the time it took to compress the reference file I used. The results are plotted on figure 1. The reference file I used is the source code tar file of the Linux 4.9 kernel which is 664 megabytes in size.</span></p>
<p><img alt="Figure 1: zlib compression curve" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/1zlib-curve.png" /></p>
<p><span>Figure 1: zlib compression curve</span></p>
<p><span>As you can see in figure 1, as the compression level goes up, so does the time to compress, and, at the higher levels (7-9), the time goes up a lot for a modest gain in extra compression. Utilities like "gzip" generally use level 6 as default level, avoiding the steep end of the curve.</span></p>
<p><span><span> </span></span><span>Now, regarding the performance of zlib, details matter and various Linux distributions end up having different performances and, specifically, the Clear Linux* OS distribution (a project I work on) ships with an optimized implementation of zlib. Figure 2 shows this difference.</span></p>
<p><img alt="Figure 2: zlib compression curve" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/2zlib-curve-distro.png" /></p>
<p><span>Figure 2: zlib compression curve</span></p>
<p><span><span> </span></span><span>More information about these optimizations to zlib can be found in the </span><a href="http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/zlib-compression-whitepaper-copy.pdf"><span>whitepaper</span></a><span> that Intel published about these optimizations.</span></p>
<h3><span>Adding XZ (LZMA*)</span></h3>
<p> </p>
<p><span><span> </span></span><span>As much as zlib is the default standard algorithm, you likely have seen a lot of files with the </span><span>xz</span><span> extension. The </span><span>xz </span><span>implementation is based on the earlier </span><span>lzma</span><span> implementation and in many ways modernizes the zlib algorithm. Where the zlib algorithm and file format are limited to a history or "window" of 32 kilobytes, showing its 1995 age, the xz implementation uses a much larger history size. This means the algorithm is going to find more repeating patterns than zlib does and can gain a higher compression ratio than zlib. At the same time, this ability to look further back also consumes more CPU cycles and memory. The xz implementation has 10 levels (0 - 9) of compression and the compression ratio vs. time tradeoff for the levels is shown in figure 3.</span></p>
<p><img alt="Figure 3: XZ/LZMA compression curve" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/3xz-curve-distro.png" /></p>
<p><span>Figure 3: XZ/LZMA compression curve</span></p>
<p><span><span> </span></span><span>Figure 3 shows both the Clear Linux* OS and Ubuntu (16.10) implementation to highlight how compiler versions and compiler options (O2 vs. O3) can make a real difference in performance.</span></p>
<p><span><span> </span></span><span>Just as with zlib, higher compression ratios come at a higher CPU time cost and the 7.5x compression ratio (L9) takes more than 10x longer to compute than the 5.1x compression ratio (L0).</span></p>
<p><span>One thing to note is how the low end (L0) of the xz implementation starts at a compression ratio of 5.2, whereas zlib's best ratio was 4.9. In other words, the longer history and improved search algorithms of xz result in a better compression ratio compared to zlib across the board.</span></p>
<h3><span>Adding bzip2 and putting it all together</span></h3>
<p> </p>
<p><span>In the past, the </span><span>bzip2</span><span> algorithm was popular but bzip2 is now mostly replaced by the xz implementation. To show why and how zlib, xz, and bzip2 compare, I've put all three measurements in figure 4.</span></p>
<p><span><span><img alt="Figure 4: zlib, XZ, and BZIP2 combined compression curve" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/4bzip2-xz-zlib-curve.png" /> </span></span></p>
<p><span>Figure 4: zlib, XZ, and BZIP2 combined compression curve</span></p>
<p><span>Note a few things about figure 4:</span></p>
<p><span>The high zlib compression levels (8 and 9) are both slower and achieve lower compression than the lowest xz compression levels, see figure 5 for a closeup. Unless you care about the compatibility of the file format, it makes no sense to pick these high zlib levels. you can pick level 0 or 1 of xz instead and get better compression at less cpu time.</span></p>
<p><img alt="Figure 5: High level zlib vs. low level xz" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/5xz-zlib-coomed.png" /></p>
<p><span>Figure 5: High level zlib vs. low level xz</span></p>
<p><span>Secondly, the bzip2 algorithm is always slower than the xz algorithm, except for an area around xz level 3 and bzip levels 6 to 9, see figure 6. xz just about always being better explains why use of bzip2 has gone out of favor for general purpose compression. Bzip2 is still used in some special places because there are some common types of data, not the Linux kernel source code, such as large sparse matrices, where bzip2 ends up doing surprisingly well. </span></p>
<p><span><span> <img alt="Figure 6: BZIP2 vs. XZ" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/6bzip2-xz-zoomed.png" /></span></span></p>
<p><span>Figure 6: BZIP2 vs. XZ<span> </span></span></p>
<h3><span>The other end of the spectrum</span></h3>
<p> </p>
<p><span><span> </span></span><span>For some usages, for example, the live streaming of data over the internet, limiting compute time is more important than compression ratio, and I've compared two common algorithms designed for this sweet spot: LZ4 and Snappy.</span></p>
<p><span><span><img alt="Figure 7: zlib, Snappy, and LZ4 combined compression curve" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/7pasted%20image%20.png" /> </span></span></p>
<p> </p>
<p><span>Figure 7: zlib, Snappy, and LZ4 combined compression curve</span></p>
<p><span><span> </span></span></p>
<p><span>As you can see in figure 7, LZ4 and Snappy are similar in compression ratio on the chosen data file at approximately 3x compression as well as being similar in performance. Compared to zlib level 1, both algorithms are roughly 4x faster while sacrificing compression down from a 4x ratio to a 3x compression ratio. </span></p>
<h3><span>Summary</span></h3>
<p> </p>
<p><span><span> </span>zlib is a reasonable "middle of the road" algorithm or implementation that reaches reasonable compression at reasonable CPU utilization, with the ubiquitousness and internet standardization as added benefits. When needing maximum compression ratios, xz is the clear choice at the expense of longer compression times, while both LZ4 and snappy provide interesting design points at the high performance and throughput end of the spectrum. The bzip2 algorithm is likely only interesting in more niche usages where the data is known to have bzip2-friendly patterns.<span> </span></span></p>
<h3>Update 1</h3>
<p> </p>
<p>I have gotten several requests to include "zstd" in the graphs; this is done below. Zstd clearly outperforms zlib across the board, and outperfoms the lower levels of XZ as well, although at the higher ratio end of the range, XZ is faster and can compress a bit more.</p>
<p> </p>
<p><img alt="XZ is faster and can compress a bit more" data-entity-type="" data-entity-uuid="" src="https://clearlinux.org/sites/default/files/8with-zstd.png" /></p>
<p><span><span> </span></span></p>
<h3><span>Links</span></h3>
<p> </p>
<p><a href="https://en.wikipedia.org/wiki/Claude_Shannon"><span>https://en.wikipedia.org/wiki/Claude_Shannon</span></a></p>
<p><a href="https://en.wikipedia.org/wiki/Context_tree_weighting"><span>https://en.wikipedia.org/wiki/Context_tree_weighting</span></a></p>
<p><a href="http://zlib.net/"><span>http://zlib.net/</span></a></p>
<p><a href="http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/zlib-compression-whitepaper-copy.pdf"><span>http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/zlib-compression-whitepaper-copy.pdf</span></a></p>
<h3><span>Versions</span></h3>
<p> </p>
<p><span>Unless otherwise stated, the comparisons were done on the Clear Linux* OS, version 12420.</span></p>
<p><span>This means zlib version 1.2.8-jtk4, xz version 5.2.3, bzip version 1.0.6, snappy version 1.1.3 and lz4 version 1.7.4.2.</span></p>
<h3><span>Disclaimers</span></h3>
<p> </p>
<p><span>The data in this blog post is not intended to compare, sell or claim performance of any piece of hardware, and is provided for ease of relative comparison between different software algorithms.</span></p>
<p><span>Tests document performance of components on a particular test, in specific systems. Differences in hardware, software, or configuration will affect actual performance. Consult other sources of information to evaluate performance as you consider your purchase.  </span></p>
<p><span>*Other names and brands may be claimed as the property of others. </span></p>
</div>
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/field/field--node--body.html.twig' -->
</div>
<a class="back_to_top" href="670.html#">
<i class="fa fa-angle-up"> </i>
</a>
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/content/node--blog--full.html.twig' -->
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--clearlinux-theme-content.html.twig' -->
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/region--content.html.twig' -->
</main>
<!-- /main -->
<footer class="footer">
<div class="container padding-md--top-bottom padding-md--left-right">
<div class="footer__logo">
<div class="footer__logo__wrapper">
<img class="footer__site_img_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/clear_linux_logo.svg" alt="Logo Clear Linux* Project"/>
<img class="footer__site_txt_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/sass/components/layout/footer/assets/clear-linux-text-white.svg" />
</div>
</div>
<div class="footer__details">
<div class="footer__top">
<div class="footer__social_media">
<ul class="footer__social_media__list">
<li class="footer__social_media__list_item">
<a target="_blank" tabindex='1' href="https://github.com/clearlinux" title="Github"><i class="fa "></i></a>
</li>
<li class="footer__social_media__list_item">
<a target="_blank" tabindex='1' href="https://www.youtube.com/channel/UChpmukwyvvdSmTA9gxKL_Fg" title="YouTube"><i class="fa "></i></a>
</li>
<li class="footer__social_media__list_item">
<a target="_blank" tabindex='1' href="http://twitter.com/clearlinux" title="Twitter"><i class="fa "></i></a>
</li>
<li class="footer__social_media__list_item">
<a target="_blank" tabindex='1' href="https://community.clearlinux.org/" title="Discourse"><i class="fa "></i></a>
</li>
</ul>
</div>
<hr>
<div class="footer__menu">
<ul class="footer__menu__list">
<li class="footer__menu__list_item">
<a tabindex='1' href="http://www.intel.com/content/www/us/en/legal/trademarks.html">*Trademarks</a>
</li>
<li class="footer__menu__list_item">
<a tabindex='1' href="http://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html">Cookies</a>
</li>
<li class="footer__menu__list_item">
<a tabindex='1' href="https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html">Privacy terms</a>
</li>
</ul>
</div>
</div>
<div class="footer__bottom">
<p class="footer__copyright">© 2022 Intel Corporation. All Rights Reserved.<br>*Other names and brands may be claimed as the property of others.</p>
</div>
</div>
</div>
<div class="footer_bottom">
<div class="container padding-md--left-right">
<div class="footer_bottom__copyright">
<i class="fa fa-copyright"></i> &nbsp; This project belongs to 01.org, Intel's opensource platform. </div>
</div>
</div>
</footer>
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/page.html.twig' -->
</div>
<!-- END OUTPUT from 'core/themes/stable/templates/content/off-canvas-page-wrapper.html.twig' -->
<script src="https://clearlinux.org/core/assets/vendor/jquery/jquery.min.js?v=3.6.0"></script>
<script src="https://clearlinux.org/core/misc/polyfills/element.matches.js?v=9.4.8"></script>
<script src="https://clearlinux.org/core/assets/vendor/once/once.min.js?v=1.0.1"></script>
<script src="https://clearlinux.org/modules/contrib/extlink/extlink.js?v=9.4.8"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.2.1/owl.carousel.min.js" integrity="sha256-s5TTOyp+xlSmsDfr/aZhg0Gz+JejYr5iTJI8JxG1SkM=" crossorigin="anonymous"></script>
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/src/jquery.colorbox.min.js?v=9.4.8"></script>
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/src/clearlinux_theme.js?v=9.4.8"></script>
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/bower_components/clipboard/dist/clipboard.min.js?v=9.4.8"></script>
<script src="https://clearlinux.org/core/assets/vendor/js-cookie/js.cookie.min.js?v=3.0.1"></script>
<script src="https://clearlinux.org/modules/contrib/eu_cookie_compliance/js/eu_cookie_compliance.min.js?v=9.4.8" defer></script>
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/dist/layout/header/header.js"></script>
<script src="https://clearlinux.org/libraries/codesnippet/lib/highlight/highlight.pack.js?v=9.4.8"></script>
<script src="https://clearlinux.org/modules/contrib/codesnippet/js/codesnippet.js?v=9.4.8"></script>
</body>
</html>
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/html.html.twig' -->