log = $loggerFactory->get('log', 'command-baidu-record'); } public function configure() { parent::configure(); $this->setDescription('查百度收录'); $this->addOption('id', 'i', InputOption::VALUE_OPTIONAL, '指定的新闻id.', false); } public function execute(InputInterface $input, OutputInterface $output): int { $id = $input->getOption('id'); $this->_start($id); return 0; } // 采集新闻板块 private function _start(int|bool $id = false) { $times = 0; $cache = []; $requestCookie = ''; $query = AppNews::query() ->where('is_delete', 0) ->where('is_record', 0) ->orderBy(Db::raw('RAND()')) ->limit(10); if ($id) { $query = $query->where('id', $id); } $query = $query->get(); foreach ($query as $index => $item) { sleep(3); try { if (!$item->platform) { throw new \Exception('没找到平台!!'); } if (!isset($cache['website'][$item->platform])) { $cache['website'][$item->platform] = AppWebsiteConfig::find($item->platform)->toArray(); } // 主域名 $domain = trim($cache['website'][$item->platform]['app_domain'], '/'); $url = 'https://m.baidu.com/pu=sz%401321_480/from=0/ssid=0/s?word=' . urlencode("https://{$domain}/news/{$item->id}"); $this->log->info('正在处理' . $url); list($res, $code, $cookie) = $this->request($url, headers: [ 'Cookie' => $requestCookie ]); // if ($cookie) { // $requestCookie = $cookie; // } $isCaptcha = false; if (stripos($res, '验证') !== false || stripos($res, 'wappass') !== false || $code != 200) { $this->log->info('有验证码!!'); $isCaptcha = true; $times++; // 有验证码就跳出 if ($times > 1) { return; } } $isHtml = false; (new Crawler($res))?->filter('.abs')?->each(function ($node) use ($domain, $item, &$isHtml) { $isHtml = true; if (stripos($node->html(), $domain) !== false) { $this->log->info('已收录'); $f = AppNews::find($item->id); $f->is_record = 1; $f->save(); } }); if ($isHtml === false && $isCaptcha == false) { if (stripos($res, '抱歉,没有找到') === false) { // 可能是页面出问题了 $this->log->info($res); } } $this->log->info('处理结束..'); }catch (\Throwable $exception) { $this->log->info($exception->getMessage()); $this->log->info($exception->getTraceAsString()); } } return; return; } protected function request(string $url, array $headers = [], string $cookieFile = 'cookie.txt'): array { $ch = curl_init(); curl_setopt_array($ch, array( CURLOPT_URL => $url, CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => '', CURLOPT_MAXREDIRS => 10, CURLOPT_HEADER => true, CURLOPT_TIMEOUT => 15, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => 'GET', CURLOPT_HTTPHEADER => $headers, )); // curl_setopt($ch, CURLOPT_URL, $url); // curl_setopt($ch, CURLOPT_HEADER, false); // curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // curl_setopt($ch, CURLOPT_TIMEOUT, 10); $result = curl_exec($ch); preg_match_all('/Set-Cookie: (.*?);/i', $result, $matches); curl_close($ch); $httpCode = curl_getinfo($ch,CURLINFO_HTTP_CODE); return [$result, $httpCode, $matches[1][0] ?? '']; } }