-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
182 lines (168 loc) · 20.9 KB
/
index.html
File metadata and controls
182 lines (168 loc) · 20.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>風月無辺 技术博客 - Its not the tools you use, but how you use them.</title><meta name="author" content="BoundlessWindMoon"><meta name="copyright" content="BoundlessWindMoon"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="First, solve the problem. Then, write the code.">
<meta property="og:type" content="website">
<meta property="og:title" content="風月無辺 技术博客">
<meta property="og:url" content="http://example.com/index.html">
<meta property="og:site_name" content="風月無辺 技术博客">
<meta property="og:description" content="First, solve the problem. Then, write the code.">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://example.com/img/name.jpg">
<meta property="article:author" content="BoundlessWindMoon">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://example.com/img/name.jpg"><link rel="shortcut icon" href="/img/favicon.png"><link rel="canonical" href="http://example.com/index.html"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free/css/all.min.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/node-snackbar/dist/snackbar.min.css" media="print" onload="this.media='all'"><script>
(() => {
const saveToLocal = {
set: (key, value, ttl) => {
if (!ttl) return
const expiry = Date.now() + ttl * 86400000
localStorage.setItem(key, JSON.stringify({ value, expiry }))
},
get: key => {
const itemStr = localStorage.getItem(key)
if (!itemStr) return undefined
const { value, expiry } = JSON.parse(itemStr)
if (Date.now() > expiry) {
localStorage.removeItem(key)
return undefined
}
return value
}
}
window.btf = {
saveToLocal,
getScript: (url, attr = {}) => new Promise((resolve, reject) => {
const script = document.createElement('script')
script.src = url
script.async = true
Object.entries(attr).forEach(([key, val]) => script.setAttribute(key, val))
script.onload = script.onreadystatechange = () => {
if (!script.readyState || /loaded|complete/.test(script.readyState)) resolve()
}
script.onerror = reject
document.head.appendChild(script)
}),
getCSS: (url, id) => new Promise((resolve, reject) => {
const link = document.createElement('link')
link.rel = 'stylesheet'
link.href = url
if (id) link.id = id
link.onload = link.onreadystatechange = () => {
if (!link.readyState || /loaded|complete/.test(link.readyState)) resolve()
}
link.onerror = reject
document.head.appendChild(link)
}),
addGlobalFn: (key, fn, name = false, parent = window) => {
if (!false && key.startsWith('pjax')) return
const globalFn = parent.globalFn || {}
globalFn[key] = globalFn[key] || {}
globalFn[key][name || Object.keys(globalFn[key]).length] = fn
parent.globalFn = globalFn
}
}
const asideStatus = saveToLocal.get('aside-status')
if (asideStatus !== undefined) {
document.documentElement.classList.toggle('hide-aside', asideStatus === 'hide')
}
const detectApple = () => {
if (/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)) {
document.documentElement.classList.add('apple')
}
}
detectApple()
})()
</script><link rel="stylesheet" href="/YSHST" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
root: '/',
algolia: undefined,
localSearch: undefined,
translate: undefined,
highlight: {"plugin":"highlight.js","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":500,"highlightFullpage":false,"highlightMacStyle":true},
copy: {
success: '复制成功',
error: '复制失败',
noSupport: '浏览器不支持'
},
relativeDate: {
homepage: false,
post: false
},
runtime: '天',
dateSuffix: {
just: '刚刚',
min: '分钟前',
hour: '小时前',
day: '天前',
month: '个月前'
},
copyright: undefined,
lightbox: 'null',
Snackbar: {"chs_to_cht":"已切换为繁体中文","cht_to_chs":"已切换为简体中文","day_to_night":"已切换为深色模式","night_to_day":"已切换为浅色模式","bgLight":"#49b1f5","bgDark":"#1f1f1f","position":"bottom-left"},
infinitegrid: {
js: 'https://cdn.jsdelivr.net/npm/@egjs/infinitegrid/dist/infinitegrid.min.js',
buttonText: '加载更多'
},
isPhotoFigcaption: false,
islazyload: false,
isAnchor: false,
percent: {
toc: true,
rightside: false,
},
autoDarkmode: false
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
title: '風月無辺 技术博客',
isPost: false,
isHome: true,
isHighlightShrink: false,
isToc: false,
isShuoshuo: false
}</script><link rel="stylesheet" href="/css/common.css"><link rel="stylesheet" href="/css/header.css"><link rel="stylesheet" href="/css/foot.css"><link rel="stylesheet" href="/css/font.css"><link rel="stylesheet" href="/css/nav.css"><meta name="generator" content="Hexo 7.3.0"></head><body><div id="loading-box"><div class="loading-left-bg"></div><div class="loading-right-bg"></div><div class="spinner-box"><div class="configure-border-1"><div class="configure-core"></div></div><div class="configure-border-2"><div class="configure-core"></div></div><div class="loading-word">加载中...</div></div></div><script>(()=>{
const $loadingBox = document.getElementById('loading-box')
const $body = document.body
const preloader = {
endLoading: () => {
$body.style.overflow = ''
$loadingBox.classList.add('loaded')
},
initLoading: () => {
$body.style.overflow = 'hidden'
$loadingBox.classList.remove('loaded')
}
}
preloader.initLoading()
window.addEventListener('load', preloader.endLoading)
if (false) {
btf.addGlobalFn('pjaxSend', preloader.initLoading, 'preloader_init')
btf.addGlobalFn('pjaxComplete', preloader.endLoading, 'preloader_end')
}
})()</script><div id="web_bg" style="background-image: url(/img/sun.png);"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img text-center"><img src="/img/name.jpg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="site-data text-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">3</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">0</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">3</div></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 首页</span></a></div></div></div></div><div class="page" id="body-wrap"><header class="not-top-img fixed" id="page-header"><nav id="nav"><span id="blog-info"><a class="nav-site-title" href="/"><img class="site-icon" src="/img/sun.png" alt="Logo"></a></span><div id="menus"><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 首页</span></a></div></div><div id="toggle-menu"><span class="site-page"><i class="fas fa-bars fa-fw"></i></span></div></div></nav><h1 class="title-seo">風月無辺 技术博客</h1></header><main class="layout" id="content-inner"><div class="recent-posts nc" id="recent-posts"><div class="recent-post-items"><div class="recent-post-item"><div class="post_cover"><a href="/2024/12/10/paper-TPU/" title="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit"><img class="post-bg" src="/img/TPU.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit"></a></div><div class="recent-post-info"><a class="article-title" href="/2024/12/10/paper-TPU/" title="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit">文章导读:Motivation for and Evaluation of the First Tensor Processing Unit</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2024-12-10T14:05:40.711Z" title="发表于 2024-12-10 22:05:40">2024-12-10</time></span><span class="article-meta"><span class="article-meta-separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E8%AE%BA%E6%96%87%E5%AF%BC%E8%AF%BB/">论文导读</a></span></div><div class="content">相关资料 https://ieeexplore.ieee.org/document/8358031 TPU介绍 Tensor Processing Unit (TPU) 是Google提出的一种专门设计的硬件加速器,这种DSA(特定领域架构)芯片展现出惊人的运算性能与能效比,自2015年以来在Google的数据中心广泛使用,服务于全球数十亿用户。 TPU硬件如下图所示,主要包含如下模块 256x256 的 INT8 矩阵乘法单元 (MACs) 片上FIFO队列:读取 DRAM 的权重值 统一缓存: 存储计算中间结果,可作为矩阵单元的输入 可编程 DMA 控制器:将数据从 CPU 端送入统一缓存 TPU使用的技术 为了缓解计算开销与访存开销的严重失衡,软件层面我们会在高速缓存中充分复用数据,减少低速内存访问次数;硬件层面 Google 使用了一种名为 systolic execution(脉动阵列)的技术,同样可以减少访存次数。 传统的计算方式(左图)是数据每计算一次就要存储一次,而下一次要调取计算结果的时候也要从存储器里面重新获得这个数据,往复循环。那么在脉动结构中,单一 PE 被替换成了一串 PE。数据在经手所有 PE 计算之后才会被存储,由于矩阵加乘计算需要大量的数据复用,这种数据计算流程大量地减少了数据被访问的次数,从而实现了更高的效率。 使用脉动阵列计算矩阵C=AxB时,需要将A、B的数据分多次输入阵列,如图所示。 TPU的优势 优势: 控制单元少 可以放更多缓存 峰值性能高 功耗低 设计简单(微结构特征简单) </div></div></div><div class="recent-post-item"><div class="post_cover"><a href="/2024/12/13/read-paper-tools/" title="看论文相关工具"><img class="post-bg" src="/img/readpaper_tools.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="看论文相关工具"></a></div><div class="recent-post-info"><a class="article-title" href="/2024/12/13/read-paper-tools/" title="看论文相关工具">看论文相关工具</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2024-12-13T08:37:37.947Z" title="发表于 2024-12-13 16:37:37">2024-12-13</time></span><span class="article-meta"><span class="article-meta-separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%B7%A5%E5%85%B7/">工具</a></span></div><div class="content">基于英文看论文 啃英文论文虽然花时间,但也有好处: 熟悉英文表达方式 慢下来思考文章逻辑 熟悉专有名词 12工具:ReadPaper位置:https://readpaper.com/new 可以选择 软件 / 网页版,软件效果如下图所示: 基于中文看论文 好处:减少语言壁垒 专注于内容 坏处:减少提高英语能力机会 12工具:yiyibook位置:https://yiyibooks.cn/ 该工具效果如下图所示: 根据论文看论文 (快速了解领域信息) 123工具:connected paper位置:https://www.connectedpapers.com/Edge浏览器可利用无痕模式反复使用 根据作者看论文(了解作者长期研究脉络) 12工具:dblp位置:https://dblp.org/ 这是我老板的相关paper 根据论文看代码 12工具:paper with code位置:https://paperswithcode.com/ </div></div></div><div class="recent-post-item"><div class="post_cover"><a href="/2024/12/05/review/" title="先导杯比赛回顾"><img class="post-bg" src="/img/priority_application_trans.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="先导杯比赛回顾"></a></div><div class="recent-post-info"><a class="article-title" href="/2024/12/05/review/" title="先导杯比赛回顾">先导杯比赛回顾</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2024-12-05T08:43:22.052Z" title="发表于 2024-12-05 16:43:22">2024-12-05</time></span><span class="article-meta"><span class="article-meta-separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E9%A1%B9%E7%9B%AE/">项目</a></span></div><div class="content">竞赛结果 本次先导杯竞赛我带领DeepOptimized团队获得国赛三等奖, 复赛rank为 5/16。 赛程期间DeepOptimized团队共完成 20+ 次代码迭代,在中科曙光DCU K100-AI平台上高度优化了卷积运算,实现了较直接卷积30-40倍的性能提升 任务介绍 本届先导杯分为 “多模态大模型基础卷积算子优化” 和 “大语言模型llama3 8B推理性能的软硬件协同优化” 赛道。 “多模态大模型基础卷积算子优化”赛道中共有6组测试样例, 6组测试样例特征各不相同,需要针对测试样例编写算子实现高性能运算。 测试样例1: K = 27 无法被2整除 测试样例4: C*R*S >> M N 测试样例6: K = 4 维度过小 DeepOptimized团队针对大赛提出的6个测试样例的访存特征,总结赛题难点如下: 线程束调度空间受限 NCHW布局空间连续性不佳 矩阵维度高度不均衡 线程束调度空间受限K100-AI的访存时延隐藏能力受限于活跃线程束的数量。常规方法根据(M, N)维度划分线程束,但在特定参数配置下,该划分方法无法满足K100-AI的调度需求。 NCHW布局空间连续性不佳在NCHW格式中,同一通道的所有像素数据在内存中是连续存储的,而在卷积操作中,通常需要访问多个通道的数据。GPU在加载数据到缓存时可能需要频繁地进行内存访问,导致缓存利用率降低,进而影响整体计算性能。 矩阵维度高度不均衡L2缓存命中率高度依赖于wave(同时运行的线程块)对输入矩阵的重复读取。当输出矩阵的维度高度不均衡时,基于轮转的线程块调度策略会导致wave覆盖的区域呈现出极度扁平的形态,从而减少了对矩阵B的重复读取机会。导致L2缓存命中率降低。 卷积转化 直接卷积运算小规模内积的深层嵌套循环无法充分利用...</div></div></div></div><nav id="pagination"><div class="pagination"><span class="page-number current">1</span></div></nav></div><div class="aside-content" id="aside-content"><div class="card-widget card-info text-center"><div class="avatar-img"><img src="/img/name.jpg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/></div><div class="author-info-name">BoundlessWindMoon</div><div class="author-info-description">First, solve the problem. Then, write the code.</div><div class="site-data"><a href="/archives/"><div class="headline">文章</div><div class="length-num">3</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">0</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">3</div></a></div><a id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/BoundlessWindMoon?tab=repositories"><i class="fab fa-github"></i><span>Follow Me</span></a></div><div class="sticky_layout"><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/2024/12/13/read-paper-tools/" title="看论文相关工具"><img src="/img/readpaper_tools.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="看论文相关工具"/></a><div class="content"><a class="title" href="/2024/12/13/read-paper-tools/" title="看论文相关工具">看论文相关工具</a><time datetime="2024-12-13T08:37:37.947Z" title="发表于 2024-12-13 16:37:37">2024-12-13</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2024/12/10/paper-TPU/" title="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit"><img src="/img/TPU.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit"/></a><div class="content"><a class="title" href="/2024/12/10/paper-TPU/" title="文章导读:Motivation for and Evaluation of the First Tensor Processing Unit">文章导读:Motivation for and Evaluation of the First Tensor Processing Unit</a><time datetime="2024-12-10T14:05:40.711Z" title="发表于 2024-12-10 22:05:40">2024-12-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2024/12/05/review/" title="先导杯比赛回顾"><img src="/img/priority_application_trans.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="先导杯比赛回顾"/></a><div class="content"><a class="title" href="/2024/12/05/review/" title="先导杯比赛回顾">先导杯比赛回顾</a><time datetime="2024-12-05T08:43:22.052Z" title="发表于 2024-12-05 16:43:22">2024-12-05</time></div></div></div></div><div class="card-widget card-archives">
<div class="item-headline">
<i class="fas fa-archive"></i>
<span>归档</span>
</div>
<ul class="card-archive-list">
<li class="card-archive-list-item">
<a class="card-archive-list-link" href="/archives/2024/12/">
<span class="card-archive-list-date">十二月 2024</span>
<span class="card-archive-list-count">3</span>
</a>
</li>
</ul></div><div class="card-widget card-webinfo"><div class="item-headline"><i class="fas fa-chart-line"></i><span>网站信息</span></div><div class="webinfo"><div class="webinfo-item"><div class="item-name">文章数目 :</div><div class="item-count">3</div></div><div class="webinfo-item"><div class="item-name">运行时间 :</div><div class="item-count" id="runtimeshow" data-publishDate="2024-11-30T16:00:00.000Z"><i class="fa-solid fa-spinner fa-spin"></i></div></div><div class="webinfo-item"><div class="item-name">本站访客数 :</div><div class="item-count" id="busuanzi_value_site_uv"><i class="fa-solid fa-spinner fa-spin"></i></div></div><div class="webinfo-item"><div class="item-name">本站总浏览量 :</div><div class="item-count" id="busuanzi_value_site_pv"><i class="fa-solid fa-spinner fa-spin"></i></div></div><div class="webinfo-item"><div class="item-name">最后更新时间 :</div><div class="item-count" id="last-push-date" data-lastPushDate="2024-12-14T05:20:54.070Z"><i class="fa-solid fa-spinner fa-spin"></i></div></div></div></div></div></div></main><footer id="footer" style="background-color: rgb(31,19,6);"><div id="footer-wrap"><div class="copyright">©2024 By BoundlessWindMoon</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="https://cdn.jsdelivr.net/npm/instant.page/instantpage.min.js" type="module"></script><script src="https://cdn.jsdelivr.net/npm/node-snackbar/dist/snackbar.min.js"></script><script>(() => {
const panguFn = () => {
if (typeof pangu === 'object') pangu.autoSpacingPage()
else {
btf.getScript('https://cdn.jsdelivr.net/npm/pangu/dist/browser/pangu.min.js')
.then(() => {
pangu.autoSpacingPage()
})
}
}
const panguInit = () => {
if (false){
GLOBAL_CONFIG_SITE.isPost && panguFn()
} else {
panguFn()
}
}
btf.addGlobalFn('pjaxComplete', panguInit, 'pangu')
document.addEventListener('DOMContentLoaded', panguInit)
})()</script><div class="js-pjax"></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script></div></body></html>