<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/"><channel><title>快速参考 on heyaohua's Blog</title><link>https://blog.heyaohua.com/tags/%E5%BF%AB%E9%80%9F%E5%8F%82%E8%80%83/</link><description>Recent content in 快速参考 on heyaohua's Blog</description><image><title>heyaohua's Blog</title><url>https://blog.heyaohua.com/og-image.png</url><link>https://blog.heyaohua.com/og-image.png</link></image><generator>Hugo</generator><language>zh-cn</language><lastBuildDate>Wed, 01 May 2024 11:00:00 +0800</lastBuildDate><atom:link href="https://blog.heyaohua.com/tags/%E5%BF%AB%E9%80%9F%E5%8F%82%E8%80%83/index.xml" rel="self" type="application/rss+xml"/><item><title>HDFS均衡操作快速参考</title><link>https://blog.heyaohua.com/posts/2024/05/hdfs-balancer-fast/</link><pubDate>Wed, 01 May 2024 11:00:00 +0800</pubDate><guid>https://blog.heyaohua.com/posts/2024/05/hdfs-balancer-fast/</guid><description>查看日志：</description><content:encoded><![CDATA[<h2 id="快速判断是否需要均衡">快速判断是否需要均衡</h2>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-python" data-lang="python"><span style="display:flex;"><span><span style="color:#6272a4"># 计算当前均衡度（标准差）</span>
</span></span><span style="display:flex;"><span>hdfs dfsadmin <span style="color:#ff79c6">-</span>report <span style="color:#ff79c6">|</span> python3 <span style="color:#ff79c6">-</span>c <span style="color:#f1fa8c">&#34;</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">import</span> sys<span style="color:#ff79c6">,</span> re
</span></span><span style="display:flex;"><span>used_percents <span style="color:#ff79c6">=</span> []
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">for</span> line <span style="color:#ff79c6">in</span> sys<span style="color:#ff79c6">.</span>stdin:
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">if</span> <span style="color:#f1fa8c">&#39;DFS Used%:&#39;</span> <span style="color:#ff79c6">in</span> line:
</span></span><span style="display:flex;"><span>        percent <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">float</span>(re<span style="color:#ff79c6">.</span>search(<span style="color:#f1fa8c">r</span><span style="color:#f1fa8c">&#39;(\d+\.?\d*)%&#39;</span>, line)<span style="color:#ff79c6">.</span>group(<span style="color:#bd93f9">1</span>))
</span></span><span style="display:flex;"><span>        used_percents<span style="color:#ff79c6">.</span>append(percent)
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">if</span> used_percents:
</span></span><span style="display:flex;"><span>    avg <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">sum</span>(used_percents) <span style="color:#ff79c6">/</span> <span style="color:#8be9fd;font-style:italic">len</span>(used_percents)
</span></span><span style="display:flex;"><span>    variance <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">sum</span>((x <span style="color:#ff79c6">-</span> avg) <span style="color:#ff79c6">**</span> <span style="color:#bd93f9">2</span> <span style="color:#ff79c6">for</span> x <span style="color:#ff79c6">in</span> used_percents) <span style="color:#ff79c6">/</span> <span style="color:#8be9fd;font-style:italic">len</span>(used_percents)
</span></span><span style="display:flex;"><span>    std_dev <span style="color:#ff79c6">=</span> variance <span style="color:#ff79c6">**</span> <span style="color:#bd93f9">0.5</span>
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">f</span><span style="color:#f1fa8c">&#39;标准差: </span><span style="color:#f1fa8c">{</span>std_dev<span style="color:#f1fa8c">:</span><span style="color:#f1fa8c">.2f</span><span style="color:#f1fa8c">}</span><span style="color:#f1fa8c">%&#39;</span>)
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">if</span> std_dev <span style="color:#ff79c6">&gt;</span> <span style="color:#bd93f9">15</span>:
</span></span><span style="display:flex;"><span>        <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">&#39;⚠️  需要立即均衡&#39;</span>)
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">elif</span> std_dev <span style="color:#ff79c6">&gt;</span> <span style="color:#bd93f9">10</span>:
</span></span><span style="display:flex;"><span>        <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">&#39;⚠️  建议进行均衡&#39;</span>)
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">else</span>:
</span></span><span style="display:flex;"><span>        <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">&#39;✅ 集群已均衡&#39;</span>)
</span></span><span style="display:flex;"><span><span style="color:#f1fa8c">&#34;</span>
</span></span></code></pre></div><h2 id="常用均衡命令">常用均衡命令</h2>
<h3 id="基本均衡">基本均衡</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-text" data-lang="text"><span style="display:flex;"><span># 标准均衡（推荐）
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 10 -policy datanode &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 严格均衡
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 5 -policy datanode &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 宽松均衡
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 15 -policy datanode &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span></code></pre></div><h3 id="高级均衡">高级均衡</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-text" data-lang="text"><span style="display:flex;"><span># 排除特定节点
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 10 -exclude 192.168.1.100,192.168.1.101 &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 只均衡特定节点
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 10 -include 192.168.1.102,192.168.1.103 &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 指定源节点
</span></span><span style="display:flex;"><span>nohup hdfs balancer -threshold 10 -source 192.168.1.100,192.168.1.101 &gt; /tmp/balancer.log 2&gt;&amp;1 &amp;
</span></span></code></pre></div><h2 id="参数说明">参数说明</h2>
<table>
  <thead>
      <tr>
          <th>参数</th>
          <th>用途</th>
          <th>默认值</th>
          <th>推荐值</th>
      </tr>
  </thead>
  <tbody>
      <tr>
          <td><code>-threshold</code></td>
          <td>均衡阈值(%)</td>
          <td>10</td>
          <td>5-15</td>
      </tr>
      <tr>
          <td><code>-policy</code></td>
          <td>均衡策略</td>
          <td>datanode</td>
          <td>datanode</td>
      </tr>
      <tr>
          <td><code>-exclude</code></td>
          <td>排除节点</td>
          <td>-</td>
          <td>维护节点</td>
      </tr>
      <tr>
          <td><code>-include</code></td>
          <td>包含节点</td>
          <td>-</td>
          <td>特定节点</td>
      </tr>
      <tr>
          <td><code>-source</code></td>
          <td>源节点</td>
          <td>-</td>
          <td>高负载节点</td>
      </tr>
      <tr>
          <td><code>-idleiterations</code></td>
          <td>空闲迭代次数</td>
          <td>5</td>
          <td>3-5</td>
      </tr>
  </tbody>
</table>
<h2 id="监控命令">监控命令</h2>
<h3 id="检查均衡状态">检查均衡状态</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-bash" data-lang="bash"><span style="display:flex;"><span><span style="color:#6272a4"># 检查均衡进程</span>
</span></span><span style="display:flex;"><span>ps aux | grep balancer
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span><span style="color:#6272a4"># 查看均衡日志</span>
</span></span><span style="display:flex;"><span>tail -f /tmp/balancer.log
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span><span style="color:#6272a4"># 实时监控均衡进度</span>
</span></span><span style="display:flex;"><span>python3 /tmp/monitor_hdfs_balancer.py
</span></span></code></pre></div><h3 id="停止均衡">停止均衡</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-text" data-lang="text"><span style="display:flex;"><span># 查找并停止均衡进程
</span></span><span style="display:flex;"><span>pkill -f &#34;hdfs.*balancer&#34;
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 或者通过PID停止
</span></span><span style="display:flex;"><span>kill $(cat /tmp/balancer.pid)
</span></span></code></pre></div><h2 id="性能优化">性能优化</h2>
<h3 id="调整均衡带宽">调整均衡带宽</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-xml" data-lang="xml"><span style="display:flex;"><span><span style="color:#6272a4">&lt;!-- 在hdfs-site.xml中添加 --&gt;</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">&lt;property&gt;</span>
</span></span><span style="display:flex;"><span>  <span style="color:#ff79c6">&lt;name&gt;</span>dfs.datanode.balance.bandwidthPerSec<span style="color:#ff79c6">&lt;/name&gt;</span>
</span></span><span style="display:flex;"><span>  <span style="color:#ff79c6">&lt;value&gt;</span>52428800<span style="color:#ff79c6">&lt;/value&gt;</span>  <span style="color:#6272a4">&lt;!-- 50MB/s --&gt;</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">&lt;/property&gt;</span>
</span></span></code></pre></div><h3 id="系统优化">系统优化</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-text" data-lang="text"><span style="display:flex;"><span># 网络优化
</span></span><span style="display:flex;"><span>echo &#39;net.core.rmem_max = 134217728&#39; &gt;&gt; /etc/sysctl.conf
</span></span><span style="display:flex;"><span>echo &#39;net.core.wmem_max = 134217728&#39; &gt;&gt; /etc/sysctl.conf
</span></span><span style="display:flex;"><span>sysctl -p
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span># 磁盘优化
</span></span><span style="display:flex;"><span>echo noop &gt; /sys/block/sda/queue/scheduler
</span></span></code></pre></div><h2 id="故障排除">故障排除</h2>
<h3 id="常见问题">常见问题</h3>
<ol>
<li><strong>均衡进程无法启动</strong></li>
<li>检查HDFS服务状态：<code>hdfs dfsadmin -report</code></li>
<li>检查权限：<code>whoami</code></li>
<li></li>
</ol>
<p>查看日志：<code>tail -f $HADOOP_LOG_DIR/hadoop-*-balancer-*.log</code></p>
<ol start="5">
<li></li>
</ol>
<p><strong>均衡速度过慢</strong></p>
<ol start="6">
<li>检查网络：<code>iperf3 -c &lt;target_node&gt;</code></li>
<li>检查磁盘I/O：<code>iostat -x 1 5</code></li>
<li></li>
</ol>
<p>调整均衡带宽</p>
<ol start="9">
<li></li>
</ol>
<p><strong>均衡进程异常退出</strong></p>
<ol start="10">
<li>检查系统资源：<code>free -h</code>, <code>df -h</code></li>
<li>查看系统日志：<code>dmesg | tail -50</code></li>
<li>重新启动均衡</li>
</ol>
<h2 id="最佳实践">最佳实践</h2>
<ol>
<li><strong>时间选择</strong>：在业务低峰期进行均衡</li>
<li><strong>参数设置</strong>：生产环境使用5-10%阈值</li>
<li><strong>监控告警</strong>：设置自动化监控和告警</li>
<li><strong>分批进行</strong>：大型集群可以分批均衡</li>
<li><strong>数据验证</strong>：均衡后检查数据完整性</li>
</ol>
<h2 id="自动化脚本">自动化脚本</h2>
<h3 id="一键均衡脚本">一键均衡脚本</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-python" data-lang="python"><span style="display:flex;"><span><span style="color:#6272a4">#!/bin/bash</span>
</span></span><span style="display:flex;"><span><span style="color:#6272a4"># 检查均衡度并自动启动均衡</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span>STD_DEV<span style="color:#ff79c6">=</span>$(hdfs dfsadmin <span style="color:#ff79c6">-</span>report <span style="color:#ff79c6">|</span> python3 <span style="color:#ff79c6">-</span>c <span style="color:#f1fa8c">&#34;</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">import</span> sys<span style="color:#ff79c6">,</span> re
</span></span><span style="display:flex;"><span>used_percents <span style="color:#ff79c6">=</span> []
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">for</span> line <span style="color:#ff79c6">in</span> sys<span style="color:#ff79c6">.</span>stdin:
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">if</span> <span style="color:#f1fa8c">&#39;DFS Used%:&#39;</span> <span style="color:#ff79c6">in</span> line:
</span></span><span style="display:flex;"><span>        percent <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">float</span>(re<span style="color:#ff79c6">.</span>search(<span style="color:#f1fa8c">r</span><span style="color:#f1fa8c">&#39;(\d+\.?\d*)%&#39;</span>, line)<span style="color:#ff79c6">.</span>group(<span style="color:#bd93f9">1</span>))
</span></span><span style="display:flex;"><span>        used_percents<span style="color:#ff79c6">.</span>append(percent)
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">if</span> used_percents:
</span></span><span style="display:flex;"><span>    avg <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">sum</span>(used_percents) <span style="color:#ff79c6">/</span> <span style="color:#8be9fd;font-style:italic">len</span>(used_percents)
</span></span><span style="display:flex;"><span>    variance <span style="color:#ff79c6">=</span> <span style="color:#8be9fd;font-style:italic">sum</span>((x <span style="color:#ff79c6">-</span> avg) <span style="color:#ff79c6">**</span> <span style="color:#bd93f9">2</span> <span style="color:#ff79c6">for</span> x <span style="color:#ff79c6">in</span> used_percents) <span style="color:#ff79c6">/</span> <span style="color:#8be9fd;font-style:italic">len</span>(used_percents)
</span></span><span style="display:flex;"><span>    std_dev <span style="color:#ff79c6">=</span> variance <span style="color:#ff79c6">**</span> <span style="color:#bd93f9">0.5</span>
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">f</span><span style="color:#f1fa8c">&#39;</span><span style="color:#f1fa8c">{</span>std_dev<span style="color:#f1fa8c">:</span><span style="color:#f1fa8c">.2f</span><span style="color:#f1fa8c">}</span><span style="color:#f1fa8c">&#39;</span>)
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">else</span>:
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">print</span>(<span style="color:#f1fa8c">&#39;0&#39;</span>)
</span></span><span style="display:flex;"><span><span style="color:#f1fa8c">&#34;)</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span>echo <span style="color:#f1fa8c">&#34;当前均衡度: $</span><span style="color:#f1fa8c">{STD_DEV}</span><span style="color:#f1fa8c">%&#34;</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">if</span> (( $(echo <span style="color:#f1fa8c">&#34;$STD_DEV &gt; 10&#34;</span> <span style="color:#ff79c6">|</span> bc <span style="color:#ff79c6">-</span>l) )); then
</span></span><span style="display:flex;"><span>    echo <span style="color:#f1fa8c">&#34;启动均衡...&#34;</span>
</span></span><span style="display:flex;"><span>    nohup hdfs balancer <span style="color:#ff79c6">-</span>threshold <span style="color:#bd93f9">10</span> <span style="color:#ff79c6">&gt;</span> <span style="color:#ff79c6">/</span>tmp<span style="color:#ff79c6">/</span>balancer<span style="color:#ff79c6">.</span>log <span style="color:#bd93f9">2</span><span style="color:#ff79c6">&gt;&amp;</span><span style="color:#bd93f9">1</span> <span style="color:#ff79c6">&amp;</span>
</span></span><span style="display:flex;"><span>    echo <span style="color:#f1fa8c">&#34;均衡进程已启动，PID: $!&#34;</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">else</span>
</span></span><span style="display:flex;"><span>    echo <span style="color:#f1fa8c">&#34;集群已均衡，无需操作&#34;</span>
</span></span><span style="display:flex;"><span>fi
</span></span></code></pre></div><h2 id="监控脚本">监控脚本</h2>
<h3 id="简化监控脚本">简化监控脚本</h3>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#282a36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-text-size-adjust:none;"><code class="language-bash" data-lang="bash"><span style="display:flex;"><span><span style="color:#ff79c6">#!/bin/bash
</span></span></span><span style="display:flex;"><span><span style="color:#6272a4"># 简化版均衡监控</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">while</span> true; <span style="color:#ff79c6">do</span>
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">echo</span> <span style="color:#f1fa8c">&#34;=== </span><span style="color:#ff79c6">$(</span>date<span style="color:#ff79c6">)</span><span style="color:#f1fa8c"> ===&#34;</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span>    <span style="color:#6272a4"># 检查均衡进程</span>
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">if</span> pgrep -f <span style="color:#f1fa8c">&#34;hdfs.*balancer&#34;</span> &gt; /dev/null; <span style="color:#ff79c6">then</span>
</span></span><span style="display:flex;"><span>        <span style="color:#8be9fd;font-style:italic">echo</span> <span style="color:#f1fa8c">&#34;✅ 均衡进程正在运行&#34;</span>
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">else</span>
</span></span><span style="display:flex;"><span>        <span style="color:#8be9fd;font-style:italic">echo</span> <span style="color:#f1fa8c">&#34;❌ 均衡进程未运行&#34;</span>
</span></span><span style="display:flex;"><span>    <span style="color:#ff79c6">fi</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span>    <span style="color:#6272a4"># 显示各节点使用率</span>
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">echo</span> <span style="color:#f1fa8c">&#34;各节点使用率:&#34;</span>
</span></span><span style="display:flex;"><span>    hdfs dfsadmin -report | grep -E <span style="color:#f1fa8c">&#34;(Name:|DFS Used%:)&#34;</span> | <span style="color:#f1fa8c">\
</span></span></span><span style="display:flex;"><span>        awk <span style="color:#f1fa8c">&#39;NR%2==1{name=$0} NR%2==0{print name &#34; &#34; $0}&#39;</span>
</span></span><span style="display:flex;"><span>
</span></span><span style="display:flex;"><span>    <span style="color:#8be9fd;font-style:italic">echo</span> <span style="color:#f1fa8c">&#34;----------------------------------------&#34;</span>
</span></span><span style="display:flex;"><span>    sleep <span style="color:#bd93f9">60</span>
</span></span><span style="display:flex;"><span><span style="color:#ff79c6">done</span>
</span></span></code></pre></div><hr>
<p><strong>注意</strong>：本快速参考适用于日常运维，详细操作请参考完整版文档。</p>
]]></content:encoded></item></channel></rss>