c语言编程笔录

首页 >   > 笔记大全

笔记大全

PHP多线程编程指南使用pthreads扩展创建并发爬虫

更新时间:2023-11-13

什么是pthreads扩展

pthreads是PHP的一个第三方扩展,它提供了一个多线程环境,在单个进程中创建多个工作线程以并行执行任务。它允许我们实现高并发操作,提高PHP应用程序的性能和可伸缩性。pthreads主要由三个类构成,分别是Thread、Worker和Pool。

// 创建一个线程类
class MyThread extends Thread
{
    // 在线程运行时执行的方法
    public function run()
    {
        echo "线程ID: " . $this->getThreadId() . " 正在运行...\n";
    }
}

// 使用线程类创建线程对象
$thread = new MyThread();
// 启动线程
$thread->start();
// 等待线程执行完成
$thread->join();

使用pthreads扩展创建爬虫

使用PHP单线程爬取数据的效率很低,因为所有的请求和解析都是串行的,这时我们可以使用pthreads扩展并发爬取网页信息,并且利用线程池来重复利用线程,提高资源利用率,下面是一个爬取网页标题的示例。

class CrawlerTask extends Threaded
{
    private $urls;

    public function __construct(array $urls)
    {
        $this->urls = $urls;
    }

    public function run()
    {
        /* 实例化GuzzleHttp\Client */
        $client = new \GuzzleHttp\Client();

        foreach ($this->urls as $url)
        {
            /* 发送HTTP请求获取HTML */
            $response = $client->get($url);
            $html = $response->getBody()->getContents();

            /* 解析HTML获取标题 */
            preg_match('/([^<]*)<\/title>/i', $html, $matches);
            $title = isset($matches[1]) ? $matches[1] : '';

            /* 打印标题 */
            echo "URL: " . $url . ", Title: " . $title . "\n";
        }
    }
}

class Crawler
{
    private $concurrency;
    private $urls;
    private $pool;

    public function __construct(array $urls, $concurrency)
    {
        $this->urls = $urls;
        $this->concurrency = $concurrency;
        $this->pool = new Pool($this->concurrency);
    }

    public function run()
    {
        /* 将所有URL任务分配给线程 */
        $numChunks = ceil(count($this->urls) / $this->concurrency);
        for ($i = 0; $i < $numChunks; $i++)
        {
            $chunk = array_slice($this->urls, $i * $this->concurrency, $this->concurrency, true);
            $task = new CrawlerTask($chunk);
            $this->pool->submit($task);
        }

        /* 等待所有线程执行完毕 */
        $this->pool->shutdown();
    }
}

$urls = ['https://www.baidu.com', 'https://www.douban.com', 'https://www.github.com'];
$crawler = new Crawler($urls, 2);
$crawler->run();
</pre>

<h4>如何避免线程安全的问题</h4>
<p>由于我们使用多线程执行并发任务,需要注意线程安全的问题。我们可以使用Mutex类来解决线程安全问题,Mutex是一种计数器,可以检测并发线程是否进入互斥状态,从而避免数据竞争。</p>

<pre class="brush:php;toolbar:false">
class Counter extends Thread
{
    private static $count = 0;
    private $mutex;

    public function __construct(Mutex $mutex)
    {
        $this->mutex = $mutex;
    }

    public function run()
    {
        $this->mutex->lock();
        self::$count += 1;
        $this->mutex->unlock();
    }

    public function getCount()
    {
        return self::$count;
    }
}

$mutex = new Mutex();
$threads = [];

for ($i = 0; $i < 10; $i++)
{
    $thread = new Counter($mutex);
    $thread->start();
    $threads[] = $thread;
}

foreach ($threads as $thread)
{
    $thread->join();
}

echo "Counter: " . $thread->getCount() . "\n";
</pre>

<h4>总结</h4>
<p>使用pthreads扩展可以让我们在PHP中实现多线程编程,从而提高应用程序的性能和可伸缩性,利用线程池可以重复利用线程,有效减少了线程的创建和销毁,避免浪费资源的问题。在使用pthreads扩展的过程中,需要考虑线程安全问题,使用Mutex类可以避免并发问题。在编写多线程程序时,需要注意线程之间的通信、同步和互斥,保证线程之间的正确性和稳定性。</p>        </div>
        <div class="share" id="down">          <div class="share-text">
            <p>本文如果侵犯了你的权益请联系站长整改删除</p>
            <p>转载请注明出处</p>
            <p>本文地址:<a href="https://www.radbuilder.com/marketing/Python/19356.html" target="_blank">https://www.radbuilder.com/marketing/Python/19356.html</a></p>
          </div>
        </div>
      </div>
    </div>
    <div class="clear blank"></div>
    <div class="down-links whitebg">
    <div class="news-title">
        <h2></h2>
      </div>
      <ul>

     </ul>
    </div>
     <div class="clear blank"></div>
    <div class="down-otherlink whitebg">
      <div class="news-title">
        <h2>图文推荐</h2>
      </div>
      <ul>
        <li><a href="https://www.radbuilder.com/marketing/Python/2052.html" target="_blank"><i><img src="/d/file/p/2023/07-01/small6647a07de5915ca25dce66845345e784.jpg"></i>
          <p>如何在php中对json对象的值进行输出</p>
          <span class="down-info"></span></a></li>
        <li><a href="https://www.radbuilder.com/marketing/Python/2007.html" target="_blank"><i><img src="/d/file/p/2023/07-01/small1c14fee124197f8a8d8d542f6468c434.jpg"></i>
          <p>jquery min js指的是什么</p>
          <span class="down-info"></span></a></li>
        <li><a href="https://www.radbuilder.com/marketing/Python/2049.html" target="_blank"><i><img src="/d/file/p/2023/07-01/small3b228fd92e9008dce070ca2578c632ff.jpg"></i>
          <p>正在执行的SQL语句怎么在postgresql中结束</p>
          <span class="down-info"></span></a></li>
        <li><a href="https://www.radbuilder.com/marketing/Python/1995.html" target="_blank"><i><img src="/d/file/p/2023/07-01/small1b9fcb9991e07208298c92968b167cbe.jpg"></i>
          <p>PHP中is+numeric与ctype+digit有什么不同</p>
          <span class="down-info"></span></a></li>
        <li><a href="https://www.radbuilder.com/marketing/Python/2109.html" target="_blank"><i><img src="/d/file/p/2023/07-02/small27f3e50affd2acd5029588ed2c1111ab.jpg"></i>
          <p>利用php如何对非法字符进行过滤</p>
          <span class="down-info"></span></a></li>
        <li><a href="https://www.radbuilder.com/marketing/Python/2102.html" target="_blank"><i><img src="/d/file/p/2023/07-02/smalle75ad5f22e9a604e3a9a83f0dd057d19.jpg"></i>
          <p>怎么在Java中使用LinkedList</p>
          <span class="down-info"></span></a></li>
      </ul>
    </div>

    <div class="pinglun-box whitebg">
      <div class="news-title">
        <h2></h2>
      </div>

    </div>
  </div>
  <aside class="side-section right-box">
     <div class="whitebg down-tuijian">

    </div>
    <div class="blank clear" ></div>
    <div class="whitebg down-paihang">
      <h2 class="side-title">热门排行</h2>
      <ul>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/2052.html" title="如何在php中对json对象的值进行输出" target="_blank">如何在php中对json对象的值进行输出</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/1174.html" title="uniapp实现定位权限" target="_blank">uniapp实现定位权限</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/1244.html" title="Python实现热加载配置文件的方法" target="_blank">Python实现热加载配置文件的方法</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/3489.html" title="ps如何把皮肤通透白嫩" target="_blank">ps如何把皮肤通透白嫩</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/11784.html" title="js中怎么用文件流下载csv文件" target="_blank">js中怎么用文件流下载csv文件</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/989.html" title="怎么使用PHP进行人工智能开发" target="_blank">怎么使用PHP进行人工智能开发</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/1008.html" title="XML文档不能使用css样式表如何办" target="_blank">XML文档不能使用css样式表如何办</a></li>
        <li><i></i><a href="https://www.radbuilder.com/marketing/Python/1257.html" title="JavaScript中的错误处理技巧" target="_blank">JavaScript中的错误处理技巧</a></li>
      </ul>
    </div>
    <div class="blank clear" ></div>
    <div class="whitebg cloud">
      <h2 class="side-title">标签云</h2>
      <ul>
     <a href="https://www.radbuilder.com/e/tags/?tagid=73&tempid=8" target="_blank">PHP(1)</a> <a href="https://www.radbuilder.com/e/tags/?tagid=102&tempid=8" target="_blank">c语言(750)</a> <a href="https://www.radbuilder.com/e/tags/?tagid=64&tempid=8" target="_blank">C(6)</a> <a href="https://www.radbuilder.com/e/tags/?tagid=63&tempid=8" target="_blank">C++(7)</a>      </ul>
    </div>
    <div class="ad ad-small"></div>
    <div class="whitebg down-suiji">
      <h2 class="side-title">猜你喜欢</h2>
      <ul>
                  
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/15248.html"><i><img src=""></i>
          <p>怎么在CakePHP中使用Swoole</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/5387.html"><i><img src=""></i>
          <p>在vscode中怎么配置latex</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/10261.html"><i><img src=""></i>
          <p>css中怎么给元素设置统一的样式</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/10934.html"><i><img src=""></i>
          <p>html怎么创建电子邮件链接</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/8744.html"><i><img src=""></i>
          <p>怎么用ASP .NETMVC3实现一个访问统计系统</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/11246.html"><i><img src=""></i>
          <p>ps虚化边缘的方法</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/7424.html"><i><img src=""></i>
          <p>c语言中如何自定义结构体+位段+枚举</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/7583.html"><i><img src=""></i>
          <p>uniapp页面全屏多少px</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/16789.html"><i><img src=""></i>
          <p>ps渲染视频没有mp4格式如何办</p>
          <span class="down-info"></span></a></li>
           
        <li><a target="_blank"  href="https://www.radbuilder.com/marketing/Python/15962.html"><i><img src=""></i>
          <p>gist的使用方法是什么</p>
          <span class="down-info"></span></a></li>
           </ul>
    </div>
  </aside>
</article>
<div class="clear blank"></div>
<footer>
  <div class="footer box">
    <div class="wxbox">
      <ul>
        <li><span> </span></li>
        <li><span> </span></li>
      </ul>
    </div>
    <div class="endnav">
      <p>备案号:<a href="https://beian.miit.gov.cn/" target="_blank" rel="nofollow">粤ICP备2023061792号-2</a> <a href="https://www.radbuilder.com/sitemap.xml" target="_blank">网站地图</a></p>
    </div>
  </div>
</footer>
<div class="toolbar-open"></div>
<div class="toolbar">
  <div class="toolbar-close"><span id="closed"></span></div>
  <div class="toolbar-nav">
    <ul id="toolbar-menu">
      <li><i class="side-icon-user"></i>
        <section>
          <div class="userinfo">
              <script src="https://www.radbuilder.com/e/member/login/loginjs.php"></script>
          </div>
        </section>
      </li>
      <li><i class="side-icon-qq"></i>
        <section class="qq-section">
          <div class="qqinfo"><a href="http://wpa.qq.com/msgrd?v=3&uin=19801987&site=qq&menu=yes">站长QQ</a></div>
        </section>
      </li>
      <li><i class="side-icon-weixin"></i>
        <section class="weixin-section">
          <div class="weixin-info">
            <p>个人微信</p>
            <p class="text12">工作时间</p>
            <p class="text12">周一至周日 9:00-21:00</p>
          </div>
        </section>
      </li>
      <li><i class="side-icon-dashang"></i>
        <section class="dashang-section">
          <p></p>
          <ul>
            <li></li>
            <li></li>
          </ul>
        </section>
      </li>
    
    </ul>
  </div>
</div>
<div class="endmenu">
<ul>
<li><a href="https://www.radbuilder.com/"><i class="iconfont icon-shouye"></i>首页</a></li>
<li><a href="https://www.radbuilder.com/phone-fenlei.html"><i class="iconfont icon-fenlei"></i>分类</a></li>
<li><a href="https://www.radbuilder.com/phone-list.html"><i class="iconfont icon-navicon-wzgl"></i>所有</a></li>
<li><a href="https://www.radbuilder.com/e/member/my/"><i class="iconfont icon-My"></i>我的</a></li>
</ul>
</div>
<a href="#" title="返回顶部" class="icon-top"></a>
</body>
</html><script src="https://www.radbuilder.com/e/public/onclick/?enews=donews&classid=20&id=19356"></script>