thinkphp5.1 利用cli命令行+Guzzle类库实现多线程爬虫,希望对需要的朋友有所帮助!

尚未亲自测试,仅供参考

下面thinkphp框架教程栏目将给大家讲解thinkphp5.1 利用cli命令行+Guzzle类库实现多线程爬虫,希望对需要的朋友有所帮助!

 

创建一个cli命令

1

php think make:command Thread thread

测试能否成功执行

1

php think thread

安装Guzzle类库

文档地址:guzzle文档地址(https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html)

实现代码

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

<?php

/**

 * Created by.

 * User: Jim

 * Date: 2020/9/29

 * Time: 14:31

 */

namespace app\command;

use GuzzleHttp\Client;

use GuzzleHttp\Pool;

use think\console\Command;

use think\console\Input;

use think\console\Output;

/**

 * Guzzle

 * Class Thread

 * @package app\command

 * 文档地址 https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html

 */

class Thread extends Command

{

    /**

     * 请求的总次数

     * @var int

     */

    protected $totalPageCount = 50;

    /**

     * 当前请求的次数

     * @var int

     */

    protected static $counter = 1;

    /**

     * 线程的数量

     * @var int

     */

    protected $threads = 20;

    protected function configure()

    {

        // 指令配置

        $this->setName('thread');

        // 设置参数

    }

    protected function execute(Input $input, Output $output)

    {

        $client = new Client();

        $requests = function ($total) use ($client) {

            foreach (range(1, $total) as $r) {

                $uri = 'https://apinew.juejin.im/content_api/v1/short_msg/detail';

                yield function () use ($client, $uri) {

                    return $client->postAsync($uri, [

                        'verify' => false,

                        'json' => [

                            'msg_id' => '6845185452727599118'

                        ]

                    ]);

                };

            }

        };

        $pool = new Pool($client, $requests($this->totalPageCount), [

            'concurrency' => $this->threads,

            // 请求成功

            'fulfilled' => function ($response, $index) use ($output) {

                $res = $response->getBody()->getContents();

                $output->writeln($res);

                $output->writeln("正在执行第{$index}个·····");

                if ($this->checkThreadIsEnd() == true) {

                    $output->writeln("------------请求结束---------");

                    return false;

                }

            },

            // 请求失败

            'rejected' => function ($reason, $index) use ($output) {

                $output->writeln("执行失败,{$reason}");

            },

        ]);

        $promise = $pool->promise();

        $promise->wait();

    }

    /**

     * 检测任务是否结束

     * @return bool

     */

    private function checkThreadIsEnd()

    {

        if (self::$counter < $this->totalPageCount) {

            self::$counter++;

            return false;

        } else {

            return true;

        }

    }

}

执行命令

1

php think thread

效果

THE END