mirror of
https://github.com/clearlinux/clearlinux.github.io.git
synced 2026-04-28 11:13:42 +00:00
1064 lines
74 KiB
HTML
1064 lines
74 KiB
HTML
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'html' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* html--node--31814.html.twig
|
||
* html--node--%.html.twig
|
||
* html--node.html.twig
|
||
x html.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/html.html.twig' -->
|
||
<!DOCTYPE html>
|
||
<html lang="en" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ og: http://ogp.me/ns# rdfs: http://www.w3.org/2000/01/rdf-schema# schema: http://schema.org/ sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema# ">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
|
||
|
||
<meta name="description" content="Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS Author: Jing Wang Intel Corporation " />
|
||
<meta property="og:site_name" content="Clear Linux* Project" />
|
||
<meta property="og:type" content="Blog" />
|
||
<meta property="og:url" content="https://clearlinux.org/blogs-news/performant-containerized-go-applications-intel-advanced-vector-extensions-512-clear" />
|
||
<meta property="og:title" content="Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS" />
|
||
<meta property="og:description" content="Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS Author: Jing Wang Intel Corporation " />
|
||
<meta name="Generator" content="Drupal 9 (https://www.drupal.org)" />
|
||
<meta name="MobileOptimized" content="width" />
|
||
<meta name="HandheldFriendly" content="true" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<style>div#sliding-popup, div#sliding-popup .eu-cookie-withdraw-banner, .eu-cookie-withdraw-tab {background: #0779BF} div#sliding-popup.eu-cookie-withdraw-wrapper { background: transparent; } #sliding-popup h1, #sliding-popup h2, #sliding-popup h3, #sliding-popup p, #sliding-popup label, #sliding-popup div, .eu-cookie-compliance-more-button, .eu-cookie-compliance-secondary-button, .eu-cookie-withdraw-tab { color: #ffffff;} .eu-cookie-withdraw-tab { border-color: #ffffff;}</style>
|
||
<link rel="icon" href="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/favicon.ico" type="image/vnd.microsoft.icon" />
|
||
<link rel="canonical" href="31814.html" />
|
||
<link rel="shortlink" href="31814.html" />
|
||
<script src="https://clearlinux.org/sites/default/files/eu_cookie_compliance/eu_cookie_compliance.script.js" defer></script>
|
||
|
||
<title>Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS | Clear Linux* Project</title>
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/ajax-progress.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/align.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/autocomplete-loading.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/fieldgroup.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/container-inline.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/clearfix.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/details.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/hidden.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/item-list.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/js.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/nowrap.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/position-container.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/progress.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/reset-appearance.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/resize.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/sticky-header.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-counter.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-report-counters.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/system-status-report-general-info.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tabledrag.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tablesort.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/core/themes/stable/css/system/components/tree-child.module.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/contrib/eu_cookie_compliance/css/eu_cookie_compliance.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/contrib/extlink/extlink.css" />
|
||
<link rel="stylesheet" media="all" href="https://use.fontawesome.com/releases/v6.1.0/css/all.css" />
|
||
<link rel="stylesheet" media="all" href="https://use.fontawesome.com/releases/v6.1.0/css/v4-shims.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/libraries/codesnippet/lib/highlight/styles/monokai_sublime.css" />
|
||
<link rel="stylesheet" media="all" href="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/css/styles.css" />
|
||
<link rel="stylesheet" media="all" href="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.2.1/assets/owl.carousel.min.css" integrity="sha256-AWqwvQ3kg5aA5KcXpX25sYKowsX97sTCTbeo33Yfyk0=" crossorigin="anonymous" />
|
||
|
||
<script src="https://clearlinux.org/core/assets/vendor/modernizr/modernizr.min.js?v=3.11.7"></script>
|
||
<script src="https://clearlinux.org/core/misc/modernizr-additional-tests.js?v=3.11.7"></script>
|
||
|
||
</head>
|
||
<body class="alias--blogs-news-performant-containerized-go-applications-intel-advanced-vector-extensions-512-clear nodetype--blog logged-out">
|
||
<div id="skip">
|
||
<a class="visually-hidden focusable skip-link" href="31814.html#main-menu">
|
||
Skip to main navigation
|
||
</a>
|
||
</div>
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'off_canvas_page_wrapper' -->
|
||
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/content/off-canvas-page-wrapper.html.twig' -->
|
||
<div class="dialog-off-canvas-main-canvas" data-off-canvas-main-canvas>
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'page' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* page--node--blog.html.twig
|
||
* page--node--31814.html.twig
|
||
* page--node--%.html.twig
|
||
* page--node.html.twig
|
||
x page.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/page.html.twig' -->
|
||
<!-- ______________________ HEADER _______________________ -->
|
||
|
||
|
||
|
||
<header id="header">
|
||
<div class="container padding-md--left-right">
|
||
<div class="header__menu_mobile">
|
||
<i class="fa fa-bars header__menu_mobile__control" aria-hidden="true"></i>
|
||
</div>
|
||
|
||
<div id="header__site_info">
|
||
<div class="header__site_img_wrapper">
|
||
<a href ="https://clearlinux.org/">
|
||
<img class="header__site_img_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/clear_linux_logo.svg" alt="Logo Clear Linux* Project"/>
|
||
<img class="header__site_txt_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/sass/components/layout/header/assets/clear-linux-text.svg" />
|
||
</a>
|
||
</div>
|
||
</div>
|
||
|
||
<nav class="header__menu">
|
||
<ul class="header__menu_list">
|
||
<li class="header__menu_list_item ">
|
||
<a tabindex='1' href="31099.html">About</a>
|
||
</li>
|
||
<li class="header__menu_list_item ">
|
||
<a tabindex='1' href="31103.html">Developer</a>
|
||
</li>
|
||
<li class="header__menu_list_item ">
|
||
<a tabindex='1' href="https://clearlinux.org/software/software.html">Software</a>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
|
||
<div class="header__search">
|
||
|
||
|
||
<div class="header__search_form__wrapper">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</header>
|
||
<!-- /header -->
|
||
<div class="header__menu-submenu green">
|
||
<div class="toolbar__container">
|
||
<div class="container padding-md--left-right">
|
||
<ul class='Header__main'>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="wrapper banner blog" >
|
||
<div class="banner__gradient "></div>
|
||
<div class="container banner__container ">
|
||
<div class="banner__content">
|
||
<h1 class="banner__title">Blogs & News</h1>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Page Header -->
|
||
<div class="page_header">
|
||
<div class="page_header__main">
|
||
|
||
<!-- tabs -->
|
||
|
||
</div>
|
||
</div>
|
||
<!-- End Page Header -->
|
||
|
||
<!-- ______________________ MAIN _______________________ -->
|
||
<main class="page-standard padding-md--top padding-lg--bottom padding-md--left-right container-xl">
|
||
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'region' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
x region--content.html.twig
|
||
* region.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/region--content.html.twig' -->
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'block' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* block--clearlinux-theme-messages.html.twig
|
||
x block--system-messages-block.html.twig
|
||
* block--system.html.twig
|
||
* block.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/block/block--system-messages-block.html.twig' -->
|
||
<div data-drupal-messages-fallback class="hidden"></div>
|
||
|
||
<!-- END OUTPUT from 'core/themes/stable/templates/block/block--system-messages-block.html.twig' -->
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'block' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
x block--sharethis.html.twig
|
||
* block--sharethis-block.html.twig
|
||
x block--sharethis.html.twig
|
||
* block.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--sharethis.html.twig' -->
|
||
<div id="block-sharethis" data-block-plugin-id="sharethis_block" class="block block-sharethis block-sharethis-block social_share">
|
||
<div class="sharethis-wrapper">
|
||
<a target="_blank" href="https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux&%3Bsrc=sdkpreparse" class="st_facebook_custom"></a>
|
||
<a target="_blank" href="https://twitter.com/intent/tweet?text=Clear%20Linux*%20Project&url=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux" class="st_twitter_custom"></a>
|
||
<a target="_blank" href="https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fclearlinux.org%2Fnews-blogs%2Fwhere-etcfstab-clear-linux&title=Clear%20Linux*%20Project" class="st_linkedin_custom"></a>
|
||
</div>
|
||
</div>
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--sharethis.html.twig' -->
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'block' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
x block--clearlinux-theme-content.html.twig
|
||
* block--system-main-block.html.twig
|
||
* block--system.html.twig
|
||
* block.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--clearlinux-theme-content.html.twig' -->
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'node' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* node--31814--full.html.twig
|
||
* node--31814.html.twig
|
||
x node--blog--full.html.twig
|
||
* node--blog.html.twig
|
||
* node--full.html.twig
|
||
* node.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/content/node--blog--full.html.twig' -->
|
||
<div class="blog_detail">
|
||
|
||
<div class="blog_detail__categories">
|
||
<a tabindex='2' href='../blogs_category_5.html' title='Maintenance'>Maintenance</a>
|
||
</div>
|
||
|
||
<h1 class="blog_detail__title">
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'field' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* field--node--title--blog.html.twig
|
||
x field--node--title.html.twig
|
||
* field--node--blog.html.twig
|
||
* field--title.html.twig
|
||
* field--string.html.twig
|
||
* field.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'core/themes/stable/templates/field/field--node--title.html.twig' -->
|
||
<span>Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS</span>
|
||
|
||
<!-- END OUTPUT from 'core/themes/stable/templates/field/field--node--title.html.twig' -->
|
||
|
||
</h1>
|
||
|
||
<p class="blog_detail__author">
|
||
Puneet Sethi
|
||
</p>
|
||
|
||
<p class="blog_detail__date">26 May, 2020</p>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'links__node' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* links--node.html.twig
|
||
x links.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'themes/contrib/cog/templates/navigation/links.html.twig' -->
|
||
|
||
<!-- END OUTPUT from 'themes/contrib/cog/templates/navigation/links.html.twig' -->
|
||
|
||
|
||
|
||
<!-- THEME DEBUG -->
|
||
<!-- THEME HOOK: 'field' -->
|
||
<!-- FILE NAME SUGGESTIONS:
|
||
* field--node--body--blog.html.twig
|
||
x field--node--body.html.twig
|
||
* field--node--blog.html.twig
|
||
* field--body.html.twig
|
||
* field--text-with-summary.html.twig
|
||
* field.html.twig
|
||
-->
|
||
<!-- BEGIN OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/field/field--node--body.html.twig' -->
|
||
|
||
|
||
<div class="Text__description">
|
||
<h1>Performant Containerized Go* Applications with Intel® Advanced Vector Extensions 512 on Clear Linux* OS</h1>
|
||
|
||
<p class="text-align-center"><em>Author: Jing Wang<br />
|
||
Intel Corporation</em></p>
|
||
|
||
<p> </p>
|
||
|
||
<p><span><span><span><span><span><span><span>Major cloud software such as </span></span></span></span></span></span></span><a href="https://www.docker.com/"><span><span><span><span><span><span><span><span><span>Docker*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, </span></span></span></span></span></span></span><a href="https://etcd.io/"><span><span><span><span><span><span><span><span><span>etcd*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, </span></span></span></span></span></span></span><a href="https://istio.io/"><span><span><span><span><span><span><span><span><span>Istio*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, </span></span></span></span></span></span></span><a href="https://kubernetes.io/"><span><span><span><span><span><span><span><span><span>Kubernetes*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, </span></span></span></span></span></span></span><a href="https://prometheus.io/"><span><span><span><span><span><span><span><span><span>Prometheus*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, and </span></span></span></span></span></span></span><a href="https://www.terraform.io/docs/cloud/index.html"><span><span><span><span><span><span><span><span><span>Terraform*</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> use the Go* programming language for core cloud infrastructure components.</span></span></span></span></span></span></span><span><span><span><span><span><span> </span></span></span></span></span></span><span><span><span><span><span><span><span>Why are they using Go? </span></span></span></span></span></span></span><a href="https://benchmarksgame-team.pages.debian.net/benchmarksgame/fastest/go-python3.html"><span><span><span><span><span><span><span><span><span>Compared with</span></span></span></span></span></span></span></span></span><span><span><span><span><span><span><span><span> many </span></span></span></span></span></span></span></span><span><span><span><span><span><span><span><span><span>other scripting languages, </span></span></span></span></span></span></span></span></span><span><span><span><span><span><span><span><span>Go is fast!</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> </span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>This article shows how to develop performant Go applications</span></span></span></span></span></span></span><span><span><span><span><span><span> that leverage </span></span></span></span></span></span><a href="https://www.intel.com/content/www/us/en/architecture-and-technology/avx-512-overview.html"><span><span><span><span><span><span><span><span>Intel® Advanced Vector Extensions 512</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> (Intel® AVX-512) and a </span></span></span></span></span></span><a href="https://hub.docker.com/r/clearlinux/golang"><span><span><span><span><span><span><span><span><span>Go container based on Clear Linux* OS</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span><span> </span></span></span></span></span></span></span></span><span><span><span><span><span><span>to improve the performance potential of Go. </span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span><span>Background</span></span></span></span></span></span></span></h2>
|
||
|
||
<p><span><span><span><span><span><span><span>Intel AVX-512 is a set of</span></span></span></span></span></span></span><span><span><span><span><span><span> SIMD i</span></span></span></span></span></span><span><span><span><span><span><span><span>nstructions that can accelerate performance for workloads with large and precise datasets. With Intel AVX-512, applications can pack 32 double-precision and 64 single-precision floating point operations within the 512-bit vectors, as well as eight 64-bit and sixteen 32-bit integers.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><a href="https://www.intel.ai/intel-deep-learning-boost/"><span><span><span><span><span><span><span><span>Intel® Deep Learning Boost</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> (Intel® DL Boost) is a set of technologies introduced with 2nd generation Intel® Xeon® processors to accelerate CPU performance in AI applications. It includes Intel® DL Boost Vector Neural Network Instructions (VNNI) which extends Intel AVX-512 features by introducing four new instructions for accelerating inner convolutional neural network loops.</span></span></span></span></span></span></p>
|
||
|
||
<p><a href="https://golang.org/doc/"><span><span><span><span><span><span><span><span>Go</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> is an open source programming language with concurrency mechanisms that help developers make full use of multicore and networked machines. It is expressive, modular, and efficient. </span></span></span></span></span></span><span><span><span><span><span><span><span>Go based data science and analytic applications typically leverage </span></span></span></span></span></span></span><a href="https://www.gonum.org/"><span><span><span><span><span><span><span><span><span>gonum</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>, </span></span></span></span></span></span></span><span><span><span><span><span><span>a set of libraries for matrices, statistics, and optimization</span></span></span></span></span></span><span><span><span><span><span><span><span>. Libraries like gonum build on top of a lower-level BLAS (Basic Linear Algebra Subroutines) layer.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><a href="https://www.gonum.org/"><span><span><span><span><span><span><span><span>Gonum</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> / </span></span></span></span></span></span><a href="https://www.netlib.org/"><span><span><span><span><span><span><span><span>netlib</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> creates wrapper packages that provide an interface to Netlib </span></span></span></span></span></span><a href="http://www.netlib.org/blas/"><span><span><span><span><span><span><span><span>CBLAS</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> implementations. Because netlib uses C and CBLAS, using gonum/netlib provides indirect use of an Intel processor’s Intel AVX-512 capability, if available on the running system. </span></span></span></span></span></span><span><span><span><span><span><span>The </span></span></span></span></span></span><a href="https://github.com/gonum/netlib"><span><span><span><span><span><span><span><span>gonum/netlib recommended BLAS layer for performance on Linux is OpenBLAS</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span>. </span></span></span></span></span></span></p>
|
||
|
||
<p><a href="https://www.openblas.net/"><span><span><span><span><span><span><span><span><span>OpenBLAS</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> is an optimized open source BLAS library based on GotoBLAS2 1.13 BSD version, implemented in C. It provides a BLAS layer implementation with Intel AVX-512 acceleration that is adaptable to Intel</span></span></span></span></span></span></span><span><span><span><span><span><span>®</span></span></span></span></span></span><span><span><span><span><span><span> Advanced Vector Extensions 2 (Intel</span></span></span></span></span></span><span><span><span><span><span><span>®</span></span></span></span></span></span><span><span><span><span><span><span><span> AVX2) or</span></span></span></span></span></span></span><span><span><span><span><strong><span><span> </span></span></strong></span></span></span></span><span><span><span><span><span><span>Intel® Streaming SIMD Extensions (Intel® SSE)</span></span></span></span></span></span><span><span><span><strong><span><span> </span></span></strong></span></span></span><span><span><span><span><span><span>only platforms. </span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>OpenBLAS uses </span></span></span></span></span></span></span><a href="https://www.openmp.org/specifications/"><span><span><span><span><span><span><span><span><span>OpenMP</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>* as the mechanism for parallelism. </span></span></span></span></span></span></span><span><span><span><span><span><span>The </span></span></span></span></span></span><span><span><span><span><span><span>OpenMP </span></span></span></span></span></span><span><span><span><span><span><span>API provides support for parallel programming with shared-memory processors across multiple platforms. </span></span></span></span></span></span><span><span><span><span><span><span><span>It provides a threading layer with configurable environment variables to balance maximum performance and resource scheduling. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><a href="https://www.clearlinux.org/clear-linux-documentation/guides/clear/performance.html"><span><span><span><span><span><span><span><span>Clear Linux OS is optimized for performance</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span>. It uses the latest compiler optimized for Intel® architecture with the latest features and compiler flags to optimize builds. The Clear Linux OS uses a </span></span></span></span></span></span><a href="../blogs/transparent-use-library-packages-optimized-intel-architecture.html"><span><span><span><span><span><span><span><span><span>multiple library build</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> approach</span></span></span></span></span></span></span><span><span><span><span><span><span> </span></span></span></span></span></span><span><span><span><span><span><span><span>and will link the library most optimized for the capabilities of the processor in the running system.</span></span></span></span></span></span></span><span><span><span><span><span><span> </span></span></span></span></span></span><span><span><span><span><span><span>Clear Linux OS provides optimized software components across the software stack that support Go development, such as Intel AVX-512 optimized glibc and OpenBLAS. </span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span><span>Go container based on Clear Linux OS</span></span></span></span></span></span></span></h2>
|
||
|
||
<p><span><span><span><span><span><span><span>A </span></span></span></span></span></span></span><a href="https://hub.docker.com/r/clearlinux/golang"><span><span><span><span><span><span><span><span><span>Go container based on Clear Linux OS</span></span></span></span></span></span></span></span></span></a><span><span><span><span><strong><span><span> </span></span></strong></span></span></span></span><span><span><span><span><span><span><span>(clearlinux/golang) was published to make the performance optimizations in Clear Linux OS easy to use with Go. The container includes OpenBLAS and OpenMP.</span></span></span></span></span></span></span></p>
|
||
|
||
<p class="text-align-center"><strong><img alt="Figure 1 clearlinux/golang container components" data-entity-type="file" data-entity-uuid="6d1d4aca-6911-4cc0-a8e0-216b6245588a" src="https://clearlinux.org/sites/default/files/inline-images/golang_container_fig1.png" width="488" height="348" loading="lazy" /></strong><br /><em>Figure 1 clearlinux/golang container components</em></p>
|
||
|
||
<p><span><span><span><span><span><span><span>The clearlinux/golang container is easy to use and customize. To use the container,</span></span></span></span></span></span></span><span><span><span><span><span><span> pull the clearlinux/golang image from DockerHub* and run it:</span></span></span></span></span></span></p>
|
||
|
||
<pre>
|
||
<code class="language-bash"> docker pull clearlinux/golang
|
||
</code></pre>
|
||
|
||
<p><span><span><span><span><span><span>To build your own Go container based on the </span></span></span></span></span></span><span><span><span><span><span><span><span>clearlinux/golang image:</span></span></span></span></span></span></span></p>
|
||
|
||
<ol><li><span><span><span><span><span><span><span>Define a Dockerfile:</span></span></span></span></span></span></span>
|
||
|
||
<pre>
|
||
<code class="language-dockerfile">FROM clearlinux/golang:latest
|
||
RUN swupd bundle-add openblas
|
||
ADD /app $GOPATH/src
|
||
WORKDIR $GOPATH/src/app
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Build the new container from the Dockerfile: </span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-bash">docker build -f dockerfile</code></pre>
|
||
</li>
|
||
</ol><p> </p>
|
||
|
||
<h3><span><span><span><span><span><span>Tuning performance</span></span></span></span></span></span></h3>
|
||
|
||
<p><span><span><span><span><span><span>OpenBLAS and OpenMP expose configuration variables </span></span></span></span></span></span><span><span><span><span><span><span>that influence performance of shared-memory multiprocessing applications</span></span></span></span></span></span><span><span><span><span><span><span>. The clearlinux/golang container is based on Clear Linux OS which uses </span></span></span></span></span></span><a href="https://www.clearlinux.org/clear-linux-documentation/guides/clear/performance.html#aggressive-compiler-flags"><span><span><span><span><span><span><span><span>compiler options that are optimized for Intel architecture</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span><span>.</span></span></span></span></span></span></span></span><span><span><span><span><span><span> Clear Linux OS </span></span></span></span></span></span><span><span><span><span><span><span>patches the default values for the OpenBLAS and OpenMP configuration variables</span></span></span></span></span></span><span><span><span><span><span><span> as follows: </span></span></span></span></span></span></p>
|
||
|
||
<ul><li><span><span><span><span><strong><span><span>USE_OPENMP:</span></span></strong></span></span></span></span><span><span><span><span><span><span><span> Specifies that OpenBLAS should use OpenMP</span></span></span></span></span></span></span><span><span><span><span><span><span>. The default value is USE_OPENMP=1.</span></span></span></span></span></span><br />
|
||
</li>
|
||
<li><span><span><span><span><strong><span><span>OMP_NUM_THREADS:</span></span></strong></span></span></span></span><span><span><span><span><span><span><span> Specifies the number of OpenMP threads to use in parallel regions. The value should be from one to the maximum number of physical CPU cores.</span></span></span></span></span></span></span><span><span><span><span><span><span> The default value is OMP_NUM_THREADS=[number of physical CPU cores of the platform].</span></span></span></span></span></span><br />
|
||
</li>
|
||
<li><span><span><span><span><strong><span><span>OMP_DYNAMIC:</span></span></strong></span></span></span></span><span><span><span><span><span><span><span> Specifies whether to enable or disable the dynamic adjustment of the number of threads within a team. The default value is OMP_DYNAMIC=disabled.</span></span></span></span></span></span></span><br />
|
||
</li>
|
||
<li><span><span><span><span><strong><span><span>OMP_THREAD_LIMIT:</span></span></strong></span></span></span></span><span><span><span><span><span><span><span> Specifies the maximum number of OpenMP threads to use in a contention group. The default value is OMP_THREAD_LIMIT=undefined.</span></span></span></span></span></span></span></li>
|
||
</ul><p><span><span><span><span><span><span>These configuration values can be overridden as needed by passing arguments to Docker or Kubernetes.</span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span><span>Intel AVX-512</span></span></span></span></span></span></span><span><span><span><span><span><span><span> with Go</span></span></span></span></span></span></span></h2>
|
||
|
||
<p><span><span><span><span><span><span><span>The Go programming language can use a system’s Intel AVX-512 capabilities via three methods:</span></span></span></span></span></span></span></p>
|
||
|
||
<ol><li><span><span><span><span><span><span><span>Direct access with Go assembly</span></span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>Access with the </span></span></span></span></span></span></span><span><span><span><span><span><span>Go</span></span></span></span></span></span><span><span><span><span><span><span><span> </span></span></span></span></span></span></span><a href="https://golang.org/cmd/cgo/"><span><span><span><span><span><span><span><span><span>cgo interface</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> </span></span></span></span></span></span></span><span><span><span><span><span><span>using intrinsics for Intel AVX-512</span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>Indirect access with 3rd party libraries such as Gonum</span></span></span></span></span></span></span></li>
|
||
</ol><p><span><span><span><span><span><span><span>The method you choose will depend on your application and language preferences.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>If you are already familiar with low-level programming using assembly or C, or your application will use data-intensive computing, the Go assembly or cgo methods will result in more </span></span></span></span></span></span></span><span><span><span><span><span><span>performance improvement. This is because both Go assembly and cgo ope</span></span></span></span></span></span><span><span><span><span><span><span><span>rate directly on the Intel AVX-512 instructions and registers.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>If you are familiar with the Go language and do not want to worry about low-level programming, your application can still take advantage of Intel AVX-512 using the Gonum method with the netlib interface.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>The examples below show implementations of each method with tests to evaluate performance. </span></span></span></span></span></span><span><span><span><span><span><span>Hardware and software configurations used in the examples and tests are described in Table 1 and Table 2. </span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<table><caption><em>Table 1: Hardware configurations used for tests.</em></caption>
|
||
<thead><tr><th colspan="2">
|
||
<p><span><span><span><span><span><span><strong><span><span>Hardware</span></span></strong></span></span></span></span></span></span></p>
|
||
</th>
|
||
</tr></thead><tbody><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Platform</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>Intel® Xeon® Platinum</span></span></span></span></span></span></span><span><span><span><span><span><span> 8269CY CPU @ 2.50 GHz (Intel processor code-named Cascade Lake) in Aliyun* Cloud</span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>CPU</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>1</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Threads/Cores</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>16/8</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Memory</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>16G</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Disk</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>200G SSD</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr></tbody></table><p> </p>
|
||
|
||
<table><caption><em>Table 2: Software configurations used for tests.</em></caption>
|
||
<thead><tr><th colspan="2">
|
||
<p><span><span><span><span><span><span><strong><span><span>Software</span></span></strong></span></span></span></span></span></span></p>
|
||
</th>
|
||
</tr></thead><tbody><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Clear Linux OS</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>Version 32310</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Linux Kernel</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>5.3.14 x86_64</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>GCC</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>9.2.1</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>OpenBLAS</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>0.3.7</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Go</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>1.13</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Gonum</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span><span>0.6.2</span></span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr><tr><td>
|
||
<p><strong><span><span><span><span><span><span><span><span><span>Runc (container runtime)</span></span></span></span></span></span></span></span></span></strong></p>
|
||
</td>
|
||
<td>
|
||
<p><span><span><span><span><span><span><span><span>1.0.0-rc9</span></span></span></span></span></span></span></span></p>
|
||
</td>
|
||
</tr></tbody></table><h2> </h2>
|
||
|
||
<h2><span><span><span><span><span><span>Intel AVX-512 with Go assembly</span></span></span></span></span></span></h2>
|
||
|
||
<p><a href="https://golang.org/doc/asm"><span><span><span><span><span><span><span><span><span>Go has invented its own assembly language</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> based on the </span></span></span></span></span></span></span><a href="https://9p.io/sys/doc/asm.html"><span><span><span><span><span><span><span><span><span>Plan 9 assembler</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>. The Go assembler exposes AVX registers directly to developers. If you aren’t familiar with <span><span>Go assembly</span></span>, look at </span></span></span></span></span></span></span><a href="https://github.com/teh-cmc/go-internals/blob/master/chapter1_assembly_primer/README.md"><span><span><span><span><span><span><span><span><span>A Primer on Go Assembly</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>This </span></span></span></span></span></span></span><span><span><span><span><span><span>example</span></span></span></span></span></span><span><span><span><span><span><span> implements vector dot multiplication in Go assembly and takes advantage of </span></span></span></span></span></span><a href="https://github.com/golang/go/wiki/AVX512"><span><span><span><span><span><span><span><span><span>Go Intel AVX-512 support</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>. The example is tested in the clearlinux/golang container. The workflow and stack for the example is shown in Figure 2. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>For this example:</span></span></span></span></span></span></span></p>
|
||
|
||
<ol><li><span><span><span><span><span><span><span>Instructions like VINSERTI128 and VPSHUFD are the SIMD instructions. </span></span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>MMX registers are M0...M7. </span></span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>SSE registers are X0...X15.</span></span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>AVX registers are Y0...Y15. </span></span></span></span></span></span></span></li>
|
||
<li><span><span><span><span><span><span><span>AVX-512 registers are Z0…Z15 (introduced in Go release 1.11).</span></span></span></span></span></span></span></li>
|
||
</ol><p class="text-align-center"><strong><img alt="Figure 2 Stack view and workflow of sample matrix dot vector program" data-entity-type="file" data-entity-uuid="0fb68c34-09a6-4b4b-8835-a3044cffe667" src="https://clearlinux.org/sites/default/files/inline-images/matrix_dot_fig2_0.png" width="500" height="377" loading="lazy" /></strong></p>
|
||
|
||
<p class="text-align-center"><em>Figure 2 Stack view and workflow of sample matrix dot vector program</em></p>
|
||
|
||
<ol><li><span><span><span><span><span><span>Declare an empty Go function </span></span></span></span></span></span><span><span><span><strong><span><span>VDotProdAVX512</span></span></strong></span></span></span><span><span><span><span><span><span> and </span></span></span></span></span></span><span><span><span><strong><span><span>VDotProdAVX2 </span></span></strong></span></span></span><span><span><span><span><span><span>respectively with two int32 vectors, a[] and b[], as parameters. The function should return an int32 result which will be the vector dot production of [a] and [b].</span></span></span></span></span></span>
|
||
|
||
<pre>
|
||
<code class="language-go">func VDotProdAVX512(a[] int32, b[] int32) int32
|
||
func VDotProdAVX2(a[] int32, b[] int32) int32</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Write the implementation of the </span></span></span></span></span></span><span><span><span><strong><span><span>DotProdAVX512 </span></span></strong><span><span><span>function in Go assembly. </span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">// definition of func VDotProdAVX512(a[] int32, b[] int32) int32
|
||
// $96 denotes the size in bytes of the stack-frame.
|
||
// $56 specifies the size of the arguments passed in by the caller.
|
||
TEXT ·VDotProdAVX512(SB), $96-56
|
||
// Move the address of a, address of b, and array length to registers
|
||
// SI, DI, and CX respectively. For simplicity, we assume the length of
|
||
// array a and b are equal and addresses have a 64-byte alignment.
|
||
MOVQ a+0(FP), SI // address of a
|
||
MOVQ b+24(FP), DI // address of b
|
||
MOVQ len+8(FP), CX // array length
|
||
|
||
// Z4 is an accumulator that sums all vector multiplication results.
|
||
// Compute Z3 = Z1 * Z2 and Z4 = Z4 + Z3 using the VMOVDQU32, VPMULLD
|
||
// and VPADDD instructions. If the array length is greater than 16,
|
||
// loop execution until we reach the end of array. Store Z4 to the stack
|
||
// frame address, vr, which is 64 bytes (512 bits) long
|
||
VPXORD Z4, Z4, Z4
|
||
start:
|
||
VMOVDQU32 (SI), Z1
|
||
VMOVDQU32 (DI), Z2
|
||
VPMULLD Z1, Z2, Z3
|
||
VPADDD Z3, Z4, Z4
|
||
ADDQ $64, SI
|
||
ADDQ $64, DI
|
||
SUBQ $16, CX
|
||
JNZ start
|
||
VMOVDQU32 Z4, d0-64(SP)// vector result to stack
|
||
|
||
// Convert the vector result to a scalar result by summing
|
||
// the INT32 elements and return the result.
|
||
LEAQ d0-64(SP), BX
|
||
MOVQ BX, 0(SP)
|
||
MOVQ $16, AX // array length
|
||
MOVQ AX, 8(SP)
|
||
CALL ·Sum32(SB) // invoke Sum32 to get scalar value
|
||
MOVL 24(SP), AX
|
||
MOVL AX, ret+48(FP) // final result
|
||
RET
|
||
|
||
TEXT ·Sum32(SB), $0-32
|
||
MOVQ $0, SI
|
||
MOVQ av+0(FP), BX // address of vector
|
||
MOVQ lv+8(FP), CX // len of vector
|
||
start:
|
||
ADDL (BX), SI
|
||
ADDQ $4, BX
|
||
DECQ CX
|
||
JNZ start
|
||
MOVL SI, ret+24(FP)
|
||
RET
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Write the implementation of the </span></span></span></span></span></span><span><span><span><strong><span><span>DotProdAVX2 </span></span></strong><span><span><span>function in Go assembly for comparison.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">// definition of func VDotProdAVX2(a[] int32, b[] int32) int32
|
||
TEXT ·VDotProdAVX2(SB), $64-56
|
||
MOVQ a+0(FP), SI // address of a
|
||
MOVQ b+24(FP), DI // address of b
|
||
MOVQ len+8(FP), CX // array length
|
||
VPXORD Y4, Y4, Y4
|
||
|
||
start:
|
||
VMOVDQU32 (SI), Y1
|
||
VMOVDQU32 (DI), Y2
|
||
VPMULLD Y1, Y2, Y3
|
||
VPADDD Y3, Y4, Y4
|
||
ADDQ $32, SI
|
||
ADDQ $32, DI
|
||
SUBQ $8, CX
|
||
JNZ start
|
||
VMOVDQU32 Y4, d0-32(SP)
|
||
|
||
LEAQ d0-32(SP), BX
|
||
MOVQ BX, 0(SP)
|
||
MOVQ $8, AX //array length
|
||
MOVQ AX, 8(SP)
|
||
CALL ·Sum32(SB)
|
||
MOVL 24(SP), AX
|
||
MOVL AX, ret+48(FP)
|
||
RET
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Create a benchmark using </span></span></span></span></span></span><a href="https://golang.org/pkg/testing/"><span><span><span><span><span><span><span><span>Go's Package testing</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> functionality. The test performs vector dot multiplication using Go assembly with Intel AVX-512. For comparison, the same test is implemented using Go assembly with Intel AVX2 and Go without AVX. Each test measures execution time (ns/op) and data throughput (Mb/s) of the function.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">import "testing"
|
||
|
||
func BenchmarkVDotProdAVX512(b *testing.B) {
|
||
var d1[1024] int32
|
||
var d2[1024] int32
|
||
for i := 0; i < 1024; i++ {
|
||
d1[i] = int32(i + 1);
|
||
d2[i] = int32(2 * i);
|
||
}
|
||
|
||
var sum2 int32 = 0
|
||
b.SetBytes(1024)
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
sum2 += VDotProdAVX512(d1[:], d2[:]) % 1024
|
||
}
|
||
}
|
||
|
||
func BenchmarkVDotProdAVX2(b *testing.B) {
|
||
var d1[1024] int32
|
||
var d2[1024] int32
|
||
for i := 0; i < 1024; i++ {
|
||
d1[i] = int32(i + 1);
|
||
d2[i] = int32(2 * i);
|
||
}
|
||
|
||
var sum2 int32 = 0
|
||
b.SetBytes(1024)
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
sum2 += VDotProdAVX2(d1[:], d2[:]) % 1024
|
||
}
|
||
}
|
||
|
||
func VDotProd(a[] int32, b[] int32) int32 {
|
||
var sum1 int32
|
||
sum1 = 0
|
||
for i := 0; i < len(a); i++ {
|
||
sum1 += a[i] * b[i]
|
||
}
|
||
return sum1
|
||
}
|
||
|
||
func BenchmarkVDotProd(b *testing.B) {
|
||
var d1[1024] int32
|
||
var d2[1024] int32
|
||
for i := 0; i < 1024; i++ {
|
||
d1[i] = int32(i + 1);
|
||
d2[i] = int32(2 * i);
|
||
}
|
||
|
||
var sum2 int32 = 0
|
||
b.SetBytes(1024)
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
sum2 += VDotProd(d1[:], d2[:]) % 1024
|
||
}
|
||
}</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Run the Go benchmarks.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-bash">$ go test -bench .
|
||
goos: linux
|
||
goarch: amd64
|
||
pkg: golang_assembly
|
||
BenchmarkVDotProd-8 1817728 659 ns/op 1553.48 MB/s
|
||
BenchmarkVDotProdAVX2-8 11320402 106 ns/op 9632.04 MB/s
|
||
BenchmarkVDotProdAVX512-8 15705526 76.4 ns/op 13404.84 MB/s
|
||
</code></pre>
|
||
<em><span><span><span><span><span><span>See Table 1 and Table 2 for configuration details. For more complete information about performance and benchmark results, visit </span></span></span></span></span></span><a href="http://www.intel.com/benchmarks"><span><span><span><span><span><span><span><span>www.intel.com/benchmarks</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span>. </span></span></span></span></span></span></em></li>
|
||
</ol><p><span><span><span><span><span><span>The test results show that the implementation using Go assembly with Intel AVX-512 (BenchmarkVDotProdAVX512-8) had a shorter execution time and higher data throughput than the implementations using Go assembly with Intel AVX2 (BenchmarkDotProdAVX2-8) and Go without AVX (BenchmarkDotProd-8).</span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span>Intel AVX-512 with Go cgo</span></span></span></span></span></span></h2>
|
||
|
||
<p><span><span><span><span><span><span><span>Cgo enables Go packages to call C code. Cgo outputs Go and C files that can be combined into a single Go package. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>The example below implements </span></span></span></span></span></span></span><span><span><span><span><span><span>vector dot multiplication with </span></span></span></span></span></span><span><span><span><span><span><span><span>VNNI, part of </span></span></span></span></span></span></span><span><span><span><span><span><span>Intel AVX-512</span></span></span></span></span></span><span><span><span><span><span><span><span>, using cgo. It is tested in the clearlinux/golang container.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Figure 3 provides an overview of how Intel AVX-512 VNNI works.</span></span></span></span></span></span></span></p>
|
||
|
||
<p class="text-align-center"><img alt="Figure 3 Intel AVX-512 VNNI overview" data-entity-type="file" data-entity-uuid="f92b418e-27b2-4873-8c6f-f190eee43a4a" src="https://clearlinux.org/sites/default/files/inline-images/avx512_vnni_fig3.png" width="500" height="255" loading="lazy" /></p>
|
||
|
||
<p class="text-align-center"><em>Figure 3 Intel AVX-512 VNNI overview</em></p>
|
||
|
||
<ol><li><span><span><span><span><span><span>In the preamble section of the Go program, define CFLAGS, LDFLAGS, and include header files.</span></span></span></span></span></span>
|
||
|
||
<pre>
|
||
<code class="language-go">package avx512
|
||
/*
|
||
#cgo CFLAGS: -mavx512f -mavx512vl -mavx512bw -mavx512vnni
|
||
#cgo LDFLAGS: -lm
|
||
#include <stdio.h>
|
||
#include <math.h>
|
||
#include <stdlib.h>
|
||
#include <x86intrin.h>
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Create a VNNI dot multiplication function using Intel AVX-512 in C code. </span></span></span></span></span></span><br /><br /><span><span><span><span><span><span>For code simplicity we assume the int8 input vectors (x, y) have a 64-byte alignment. </span></span></span><strong><span><span>_mm512_dpbusds_epi32</span></span></strong><span><span><span> is the key fused multiply-add (FMA) instruction, which uses one instruction to implement vector dot multiplication from an int8 input to an int32 output. Without using the FMA instruction, three separate instructions are needed to execute the multiplication, as shown in Figure 3.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">int32_t avx512_dot_vnni(const size_t n, int8_t *x, int8_t *y)
|
||
{
|
||
static const size_t single_size = 64;
|
||
const size_t end = n / single_size;
|
||
__m512i *vx = (__m512i *)x;
|
||
__m512i *vy = (__m512i *)y;
|
||
__m512i vsum = {0};
|
||
__m512i *psum = &vsum;
|
||
for(size_t i = 0; i < end; ++i) {
|
||
*psum = _mm512_dpbusds_epi32(vsum, vx[i], vy[i]);
|
||
}
|
||
int32_t *t = (int32_t *)psum;
|
||
int32_t sum = 0;
|
||
for (int i = 0; i < 16; i++) {
|
||
sum += t[i];
|
||
}
|
||
return sum;
|
||
}</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Implement a dot multiplication function using Intel AVX2 for comparison.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">int32_t avx2_dot_int8(const size_t n, int8_t *x, int8_t *y)
|
||
{
|
||
static const size_t single_size = 32;
|
||
const size_t end = n / single_size;
|
||
const int16_t op4[16] = {[0 ... 15] = 1};
|
||
__m256i *vx = (__m256i *)x;
|
||
__m256i *vy = (__m256i *)y;
|
||
__m256i vsum = {0};
|
||
int32_t *t = (int32_t *)&vsum;
|
||
for(size_t i = 0; i < end; ++i) {
|
||
__m256i vresult1 = _mm256_maddubs_epi16(vx[i], vy[i]);
|
||
__m256i vresult2 = _mm256_madd_epi16(vresult1, *(__m256i *)&op4);
|
||
// trick here is to stop compiler over-optimize
|
||
*(__m256i *)t = _mm256_add_epi32(vsum, vresult2);
|
||
}
|
||
int32_t sum = 0;
|
||
for (int i = 0; i < 8; i++) {
|
||
sum += t[i];
|
||
}
|
||
return sum;
|
||
}
|
||
*/
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Define a simple Go function that calls the C function from the previous step.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">import "C"
|
||
import (
|
||
"math"
|
||
"reflect"
|
||
"unsafe"
|
||
)
|
||
func Dot_avx512_vnni(size int, x, y []int8) int32 {
|
||
size = align(size)
|
||
dot := C.avx512_dot_vnni((C.size_t)(size), (*C.int8_t)(&x[0]), (*C.int8_t)(&y[0]))
|
||
return int32(dot)
|
||
}
|
||
|
||
func Dot_avx2_int8(size int, x, y []int8) int32 {
|
||
size = align(size)
|
||
dot := C.avx2_dot_int8((C.size_t)(size), (*C.int8_t)(&x[0]), (*C.int8_t)(&y[0]))
|
||
return int32(dot)
|
||
}
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Create a benchmark using </span></span></span></span></span></span><a href="https://golang.org/pkg/testing/"><span><span><span><span><span><span><span><span>Go’s Package testing</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> functionality. The test performs vector dot multiplication using Go cgo with Intel AVX-512 VNNI. For comparison, the same test is implemented using Go cgo with Intel AVX2. Each test measures execution time (ns/op) and data throughput (Mb/s) of the function.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">import "testing"
|
||
|
||
func BenchmarkAVX512DotVnni(b *testing.B) {
|
||
size := benchsize
|
||
vx := Make_int8(size)
|
||
vy := Make_int8(size)
|
||
for i := 0; i < size; i++ {
|
||
vx[i] = int8(rand.Intn(127))
|
||
vy[i] = int8(rand.Intn(127))
|
||
}
|
||
b.SetBytes(int64(size))
|
||
b.ResetTimer()
|
||
var result int32 = 0
|
||
for i := 0; i < b.N; i++ {
|
||
result += Dot_avx512_vnni(size, vx, vy)
|
||
vx[i % size] = int8(result)
|
||
vy[i % size] = int8(result)
|
||
}
|
||
}
|
||
|
||
func BenchmarkAvx2DotInt8(b *testing.B) {
|
||
size := benchsize
|
||
vx := Make_int8(size)
|
||
vy := Make_int8(size)
|
||
for i := 0; i < size; i++ {
|
||
vx[i] = int8(rand.Intn(127))
|
||
vy[i] = int8(rand.Intn(127))
|
||
}
|
||
b.SetBytes(int64(size))
|
||
b.ResetTimer()
|
||
var result int32 = 0
|
||
for i := 0; i < b.N; i++ {
|
||
result += Dot_avx2_int8(size, vx, vy)
|
||
vx[i % size] = int8(result)
|
||
vy[i % size] = int8(result)
|
||
}
|
||
}
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Run the Go benchmark.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-bash">$ go test -bench . -benchtime 10s
|
||
goos: linux
|
||
goarch: amd64
|
||
pkg: golang-avx
|
||
BenchmarkAvx2DotInt8-8 99550977 122 ns/op 8491.32 MB/s
|
||
BenchmarkAVX512DotVnni-8 100000000 106 ns/op 9760.42 MB/s
|
||
</code></pre>
|
||
<em><span><span><span><span><span><span>See Table 1 and Table 2 for configuration details. For more complete information about performance and benchmark results, visit </span></span></span></span></span></span><a href="http://www.intel.com/benchmarks"><span><span><span><span><span><span><span><span>www.intel.com/benchmarks</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span>. </span></span></span></span></span></span></em></li>
|
||
</ol><p><span><span><span><span><span><span>The test results show that the implementation using Go cgo with Intel AVX-512 VNNI (BenchmarkAVX512DotVnni-8) had the shortest execution time and higher data throughput compared to the implementation using Go cgo with Intel AVX2 and the implementation using Go cgo with Intel AVX-512.</span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span>Intel AVX-512 with Gonum</span></span></span></span></span></span></h2>
|
||
|
||
<p><a href="https://github.com/gonum/gonum"><span><span><span><span><span><span><span><span><span>Gonum</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> netlib is a set of wrapper packages that provide an interface to Netlib CBLAS implementations such as OpenBLAS. This example </span></span></span></span></span></span></span><span><span><span><span><span><span>implements matrix multiplication </span></span></span></span></span></span><span><span><span><span><span><span><span>using Gonum netlib with an OpenBLAS backend. It is tested in the clearlinux/gonum container.</span></span></span></span></span></span></span></p>
|
||
|
||
<p class="text-align-center"><strong><img alt="Figure 4 Diagram of Gonum system architecture" data-entity-type="file" data-entity-uuid="de969832-5078-4335-9e86-436191c09e3a" src="https://clearlinux.org/sites/default/files/inline-images/gonum_fig4.png" width="395" height="322" loading="lazy" /></strong></p>
|
||
|
||
<p class="text-align-center"><em>Figure 4 Diagram of Gonum system architecture</em></p>
|
||
|
||
<ol><li><span><span><span><span><span><span>Install <span>the Gonum core packages and netlib packages which provide the interface to the CBLAS implementations.</span></span></span></span></span></span></span>
|
||
|
||
<pre>
|
||
<code class="language-bash">go get -u gonum.org/v1/gonum/...
|
||
go get -d gonum.org/v1/netlib/...
|
||
</code></pre>
|
||
|
||
<p><span><span><span><span><span><span><span>Or pull the available clearlinux/gonum docker container from DockerHub which contains all needed packages.</span></span></span></span></span></span></span></p>
|
||
|
||
<pre>
|
||
<code class="language-bash">docker pull clearlinux/gonum</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Import the required packages into your Go program. To use netlib CBLAS, you must explicitly import gonum.org/v1/netlib/blas/netlib at the beginning of the file. </span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">package main
|
||
|
||
import (
|
||
"testing"
|
||
"math/rand"
|
||
"gonum.org/v1/gonum/mat"
|
||
"gonum.org/v1/gonum/blas/gonum"
|
||
"gonum.org/v1/netlib/blas/netlib"
|
||
"gonum.org/v1/gonum/blas"
|
||
"gonum.org/v1/gonum/blas/blas64"
|
||
)
|
||
</code></pre>
|
||
</li>
|
||
<li><span><span><span><span><span><span>Create a benchmark using </span></span></span></span></span></span><a href="https://golang.org/pkg/testing/"><span><span><span><span><span><span><span><span>Go's Package testing</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> functionality. The test performs matrix multiplication using the General Matrix Multiply (GEMM) implementation of Gonum netlib CBLAS, which calls the OpenBLAS implementation underneath. </span></span></span></span></span></span><br /><br /><span><span><span><span><span><span>In the sample code below,</span></span></span></span></span></span><span><span><span><strong><span><span> blas64.Use(netlib.Implementation{})</span></span></strong></span></span></span><span><span><span><span><span><span> causes netlib CBLAS to be used instead of the Go BLAS.</span></span></span></span></span></span><br /><br /><span><span><span><span><span><span>The test measures execution time (ns/op) and data throughput (Mb/s) of the function.</span></span></span></span></span></span>
|
||
<pre>
|
||
<code class="language-go">func BenchmarkCBlas64(b *testing.B) {
|
||
blas64.Use(netlib.Implementation{})
|
||
in1 := make([]float64, M * K)
|
||
in2 := make([]float64, K * N)
|
||
for i := range in1 {
|
||
in1[i] = rand.NormFloat64()
|
||
in2[i] = rand.NormFloat64()
|
||
}
|
||
out := make([]float64, M * N)
|
||
b.SetBytes(M*K*N)
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, blas64.General{
|
||
Rows: M,
|
||
Cols: K,
|
||
Stride: K,
|
||
Data: in1,
|
||
}, blas64.General{
|
||
Rows: K,
|
||
Cols: N,
|
||
Stride: N,
|
||
Data: in2,
|
||
}, 1, blas64.General{
|
||
Rows: M,
|
||
Cols: N,
|
||
Stride: N,
|
||
Data: out,
|
||
})
|
||
}
|
||
}
|
||
</code></pre>
|
||
|
||
<p> </p>
|
||
</li>
|
||
<li>
|
||
<p><span><span><span><span><span><span>For comparison, create a second benchmark that performs the same matrix multiplication using the default Go BLAS implementation with same test code. </span></span></span></span></span></span></p>
|
||
|
||
<pre>
|
||
<code class="language-go">func BenchmarkGoBlas64(b *testing.B) {
|
||
blas64.Use(gonum.Implementation{})
|
||
// copy the rest of the code from the BenchmarkCBlas64 function above
|
||
// …
|
||
}
|
||
</code></pre>
|
||
</li>
|
||
<li>
|
||
<p><span><span><span><span><span><span>Run the Go benchmarks.</span></span></span></span></span></span></p>
|
||
|
||
<pre>
|
||
<code class="language-bash">$ go test -bench . -benchtime 5s
|
||
goos: linux
|
||
goarch: amd64
|
||
BenchmarkGoBlas64-8 2209 2708501 ns/op 9968.61 MB/s
|
||
BenchmarkCBlas64-8 12014 496631 ns/op 54366.31 MB/s
|
||
</code></pre>
|
||
|
||
<p><em><span><span><span><span><span><span>See Table 1 and Table 2 for configuration details. For more complete information about performance and benchmark results, visit </span></span></span></span></span></span><a href="http://www.intel.com/benchmarks"><span><span><span><span><span><span><span><span>www.intel.com/benchmarks</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span>. </span></span></span></span></span></span></em></p>
|
||
</li>
|
||
</ol><p> </p>
|
||
|
||
<p><span><span><span><span><span><span>The test results show that the Gonum netlib CBLAS (BenchmarkCBlas64-8) implementation had a shorter execution time and higher data throughput than the default GoBLAS (BenchmarkGoBlas64-8) implementation. </span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<h2><span><span><span><span><span><span>Conclusion</span></span></span></span></span></span></h2>
|
||
|
||
<p><span><span><span><span><span><span><span>The examples show how to use Intel AVX-512 with Go to improve application performance using three different methods: direct access with Go assembly</span></span></span></span></span></span></span><span><span><span><span><span><span>, using the </span></span></span></span></span></span><span><span><span><span><span><span><span>Go cgo interface</span></span></span></span></span></span></span><span><span><span><span><span><span><span> </span></span></span></span></span></span></span><span><span><span><span><span><span>with intrinsics for Intel AVX-512, and via i</span></span></span></span></span></span><span><span><span><span><span><span><span>ndirect access with 3rd party libraries such as Gonum. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Each method showed that using Intel AVX-512 improved Go application performance, with shorter execution times and improved date throughput overall. Using Go assembly with direct access to the CPU instruction set was faster than indirect access using cgo or Gonum. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Clear Linus OS makes it easy to </span></span></span></span></span></span></span><span><span><span><span><span><span>use Intel AVX-512 in Go because </span></span></span></span></span></span><span><span><span><span><span><span><span>Clear Linux OS provides </span></span></span></span></span></span></span><span><span><span><span><span><span>a </span></span></span></span></span></span><span><span><span><span><span><span><span>deeply optimized software stack, including Intel AVX-512 enabled software</span></span></span></span></span></span></span><span><span><span><span><span><span>.</span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>By providing ready-to-use configurable Clear Linux OS based containers for Go applications, this performance potential can be easily deployed to a Kubernetes cluster. Both </span></span></span></span></span></span><span><span><span><span><span><span><span>containers are available on DockerHub:</span></span></span></span></span></span></span></p>
|
||
|
||
<ul><li><a href="https://hub.docker.com/r/clearlinux/golang"><span><span><span><span><span><span><span><span>clearlinux/golang</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> </span></span></span></span></span></span></li>
|
||
<li><a href="https://hub.docker.com/r/clearlinux/gonum"><span><span><span><span><span><span><span><span>clearlinux/gonum</span></span></span></span></span></span></span></span></a><span><span><span><span><span><span> </span></span></span></span></span></span></li>
|
||
</ul><p><span><span><span><span><span><span><span>Check out these container images and let us know if you see a difference in your Go development using Intel AVX-512 with Clear Linux OS! </span></span></span></span></span></span></span></p>
|
||
|
||
<p> </p>
|
||
|
||
<blockquote>
|
||
<p><span><span><span><strong><span><span>Notices & Disclaimers</span></span></strong></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. </span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products. For more complete information visit www.intel.com/benchmarks.</span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>Performance results are based on testing as of </span></span></span></span></span></span><span><span><span><strong><span><span>2020/01/10</span></span></strong></span></span></span><span><span><span><span><span><span> and may not reflect all publicly available updates. See Table 1 and Table 2 above for configuration details. No product or component can be absolutely secure. Your costs and results may vary. </span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>Intel technologies may require enabled hardware, software or service activation.</span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span>© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. </span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>See backup for configuration details. For more complete information about performance and benchmark results, visit </span></span></span></span></span></span></span><a href="http://www.intel.com/benchmarks"><span><span><span><span><span><span><span><span><span>www.intel.com/benchmarks</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span>. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Intel does not control or audit third-party data. You should consult other sources to evaluate accuracy.</span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Intel's compilers may or may not optimize to the same degree for non-Intel microprocessors for optimizations that are not unique to Intel microprocessors. These optimizations include SSE2, SSE3, and SSSE3 instruction sets and other optimizations. Intel does not guarantee the availability, functionality, or effectiveness of any optimization on microprocessors not manufactured by Intel. Microprocessor-dependent optimizations in this product are intended for use with Intel microprocessors. Certain optimizations not specific to Intel microarchitecture are reserved for Intel microprocessors. Please refer to the applicable product User and Reference Guides for more information regarding the specific instruction sets covered by this notice. </span></span></span></span></span></span></span></p>
|
||
|
||
<p><span><span><span><span><span><span><span>Refer to </span></span></span></span></span></span></span><a href="http://software.intel.com/en-us/articles/optimization-notice"><span><span><span><span><span><span><span><span><span>http://software.intel.com/en-us/articles/optimization-notice</span></span></span></span></span></span></span></span></span></a><span><span><span><span><span><span><span> for more information regarding performance and optimization choices in Intel software products.</span></span></span></span></span></span></span></p>
|
||
</blockquote>
|
||
</div>
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/field/field--node--body.html.twig' -->
|
||
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<a class="back_to_top" href="31814.html#">
|
||
<i class="fa fa-angle-up"> </i>
|
||
</a>
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/content/node--blog--full.html.twig' -->
|
||
|
||
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/block/block--clearlinux-theme-content.html.twig' -->
|
||
|
||
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/region--content.html.twig' -->
|
||
|
||
|
||
</main>
|
||
|
||
<!-- /main -->
|
||
|
||
|
||
<footer class="footer">
|
||
<div class="container padding-md--top-bottom padding-md--left-right">
|
||
<div class="footer__logo">
|
||
<div class="footer__logo__wrapper">
|
||
<img class="footer__site_img_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/clear_linux_logo.svg" alt="Logo Clear Linux* Project"/>
|
||
<img class="footer__site_txt_object" src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/sass/components/layout/footer/assets/clear-linux-text-white.svg" />
|
||
</div>
|
||
</div>
|
||
<div class="footer__details">
|
||
<div class="footer__top">
|
||
<div class="footer__social_media">
|
||
<ul class="footer__social_media__list">
|
||
<li class="footer__social_media__list_item">
|
||
<a target="_blank" tabindex='1' href="https://github.com/clearlinux" title="Github"><i class="fa "></i></a>
|
||
</li>
|
||
<li class="footer__social_media__list_item">
|
||
<a target="_blank" tabindex='1' href="https://www.youtube.com/channel/UChpmukwyvvdSmTA9gxKL_Fg" title="YouTube"><i class="fa "></i></a>
|
||
</li>
|
||
<li class="footer__social_media__list_item">
|
||
<a target="_blank" tabindex='1' href="http://twitter.com/clearlinux" title="Twitter"><i class="fa "></i></a>
|
||
</li>
|
||
<li class="footer__social_media__list_item">
|
||
<a target="_blank" tabindex='1' href="https://community.clearlinux.org/" title="Discourse"><i class="fa "></i></a>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
<hr>
|
||
<div class="footer__menu">
|
||
<ul class="footer__menu__list">
|
||
<li class="footer__menu__list_item">
|
||
<a tabindex='1' href="http://www.intel.com/content/www/us/en/legal/trademarks.html">*Trademarks</a>
|
||
</li>
|
||
<li class="footer__menu__list_item">
|
||
<a tabindex='1' href="http://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html">Cookies</a>
|
||
</li>
|
||
<li class="footer__menu__list_item">
|
||
<a tabindex='1' href="https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html">Privacy terms</a>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<div class="footer__bottom">
|
||
<p class="footer__copyright">© 2022 Intel Corporation. All Rights Reserved.<br>*Other names and brands may be claimed as the property of others.</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="footer_bottom">
|
||
<div class="container padding-md--left-right">
|
||
<div class="footer_bottom__copyright">
|
||
<i class="fa fa-copyright"></i> This project belongs to 01.org, Intel's opensource platform. </div>
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/page.html.twig' -->
|
||
|
||
|
||
</div>
|
||
|
||
<!-- END OUTPUT from 'core/themes/stable/templates/content/off-canvas-page-wrapper.html.twig' -->
|
||
|
||
|
||
|
||
<script src="https://clearlinux.org/core/assets/vendor/jquery/jquery.min.js?v=3.6.0"></script>
|
||
<script src="https://clearlinux.org/core/misc/polyfills/element.matches.js?v=9.4.8"></script>
|
||
<script src="https://clearlinux.org/core/assets/vendor/once/once.min.js?v=1.0.1"></script>
|
||
<script src="https://clearlinux.org/modules/contrib/extlink/extlink.js?v=9.4.8"></script>
|
||
|
||
<script src="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.2.1/owl.carousel.min.js" integrity="sha256-s5TTOyp+xlSmsDfr/aZhg0Gz+JejYr5iTJI8JxG1SkM=" crossorigin="anonymous"></script>
|
||
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/src/jquery.colorbox.min.js?v=9.4.8"></script>
|
||
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/src/clearlinux_theme.js?v=9.4.8"></script>
|
||
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/bower_components/clipboard/dist/clipboard.min.js?v=9.4.8"></script>
|
||
<script src="https://clearlinux.org/core/assets/vendor/js-cookie/js.cookie.min.js?v=3.0.1"></script>
|
||
<script src="https://clearlinux.org/modules/contrib/eu_cookie_compliance/js/eu_cookie_compliance.min.js?v=9.4.8" defer></script>
|
||
<script src="https://clearlinux.org/modules/custom/clearlinux.org/themes/clearlinux_theme/js/dist/layout/header/header.js"></script>
|
||
<script src="https://clearlinux.org/libraries/codesnippet/lib/highlight/highlight.pack.js?v=9.4.8"></script>
|
||
<script src="https://clearlinux.org/modules/contrib/codesnippet/js/codesnippet.js?v=9.4.8"></script>
|
||
|
||
</body>
|
||
</html>
|
||
|
||
<!-- END OUTPUT from 'modules/custom/clearlinux.org/themes/clearlinux_theme/templates/layout/html.html.twig' -->
|
||
|